[compiler-rt] r254678 - [PGO] Unify VP data format between raw and indexed profile (runtime)

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 3 17:02:24 PST 2015


Author: davidxl
Date: Thu Dec  3 19:02:24 2015
New Revision: 254678

URL: http://llvm.org/viewvc/llvm-project?rev=254678&view=rev
Log:
[PGO] Unify VP data format between raw and indexed profile (runtime)

With the latest refactoring and code sharing patches landed, 
it is possible to unify the value profile implementation between
raw and indexed profile. This is part  in prfofile runtime.

Differential Revision: http://reviews.llvm.org/D15057

Added:
    compiler-rt/trunk/test/profile/instrprof-value-prof.c
Modified:
    compiler-rt/trunk/lib/profile/InstrProfiling.c

Modified: compiler-rt/trunk/lib/profile/InstrProfiling.c
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/profile/InstrProfiling.c?rev=254678&r1=254677&r2=254678&view=diff
==============================================================================
--- compiler-rt/trunk/lib/profile/InstrProfiling.c (original)
+++ compiler-rt/trunk/lib/profile/InstrProfiling.c Thu Dec  3 19:02:24 2015
@@ -9,8 +9,19 @@
 
 #include "InstrProfiling.h"
 #include <limits.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#define INSTR_PROF_VALUE_PROF_DATA
+#define INSTR_PROF_COMMON_API_IMPL
+#include "InstrProfData.inc"
+
+#define PROF_OOM(Msg) PROF_ERR(Msg ":%s\n", "Out of memory");
+#define PROF_OOM_RETURN(Msg)                                                   \
+  {                                                                            \
+    PROF_OOM(Msg)                                                              \
+    return 0;                                                                  \
+  }
 
 LLVM_LIBRARY_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
   return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
@@ -60,20 +71,29 @@ LLVM_LIBRARY_VISIBILITY void __llvm_prof
   }
 }
 
-/* Total number of value profile data in bytes. */
-static uint64_t TotalValueDataSize = 0;
-
-#ifdef _MIPS_ARCH
+/* This method is only used in value profiler mock testing.  */
 LLVM_LIBRARY_VISIBILITY void
-__llvm_profile_instrument_target(uint64_t TargetValue, void *Data_,
-                                 uint32_t CounterIndex) {}
+__llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
+                                   uint32_t ValueKind, uint16_t NumValueSites) {
+  *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
+}
+
+/* This method is only used in value profiler mock testing.  */
+LLVM_LIBRARY_VISIBILITY const __llvm_profile_data *
+__llvm_profile_iterate_data(const __llvm_profile_data *Data) {
+  return Data + 1;
+}
 
-#else
+/* This method is only used in value profiler mock testing.  */
+LLVM_LIBRARY_VISIBILITY void *
+__llvm_get_function_addr(const __llvm_profile_data *Data) {
+  return Data->FunctionPointer;
+}
 
 /* Allocate an array that holds the pointers to the linked lists of
  * value profile counter nodes. The number of element of the array
  * is the total number of value profile sites instrumented. Returns
- *  0 if allocation fails.
+ * 0 if allocation fails.
  */
 
 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
@@ -90,16 +110,27 @@ static int allocateValueProfileCounters(
     free(Mem);
     return 0;
   }
-  /*  In the raw format, there will be an value count array preceding
-   *  the value profile data. The element type of the array is uint8_t,
-   *  and there is one element in array per value site. The element
-   *  stores the number of values profiled for the corresponding site.
-   */
-  uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites);
-  __sync_fetch_and_add(&TotalValueDataSize, NumVSites + Padding);
   return 1;
 }
 
+static void deallocateValueProfileCounters(__llvm_profile_data *Data) {
+  uint64_t NumVSites = 0, I;
+  uint32_t VKI;
+  if (!Data->Values)
+    return;
+  for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
+    NumVSites += Data->NumValueSites[VKI];
+  for (I = 0; I < NumVSites; I++) {
+    ValueProfNode *Node = ((ValueProfNode **)Data->Values)[I];
+    while (Node) {
+      ValueProfNode *Next = Node->Next;
+      free(Node);
+      Node = Next;
+    }
+  }
+  free(Data->Values);
+}
+
 LLVM_LIBRARY_VISIBILITY void
 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
                                  uint32_t CounterIndex) {
@@ -149,61 +180,87 @@ __llvm_profile_instrument_target(uint64_
     free(CurrentVNode);
     return;
   }
-  __sync_fetch_and_add(&TotalValueDataSize, Success * sizeof(ValueProfNode));
 }
-#endif
+
+/* For multi-threaded programs, while the profile is being dumped, other
+   threads may still be updating the value profile data and creating new
+   value entries. To accommadate this, we need to add extra bytes to the
+   data buffer. The size of the extra space is controlled by an environment
+   varaible. */
+static unsigned getVprofExtraBytes() {
+  const char *ExtraStr = getenv("LLVM_VALUE_PROF_BUFFER_EXTRA");
+  if (!ExtraStr || !ExtraStr[0])
+    return 1024;
+  return (unsigned)atoi(ExtraStr);
+}
 
 LLVM_LIBRARY_VISIBILITY uint64_t
 __llvm_profile_gather_value_data(uint8_t **VDataArray) {
+  size_t S = 0, RealSize = 0, BufferCapacity = 0, Extra = 0;
+  __llvm_profile_data *I;
+  if (!VDataArray)
+    PROF_OOM_RETURN("Failed to write value profile data ");
 
-  if (!VDataArray || 0 == TotalValueDataSize)
-    return 0;
-
-  uint64_t NumData = TotalValueDataSize;
-  *VDataArray = (uint8_t *)calloc(NumData, sizeof(uint8_t));
-  if (!*VDataArray)
-    return 0;
-
-  uint8_t *VDataEnd = *VDataArray + NumData;
-  uint8_t *PerSiteCountsHead = *VDataArray;
   const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
   const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
-  __llvm_profile_data *I;
+
+  /*
+   * Compute the total Size of the buffer to hold ValueProfData
+   * structures for functions with value profile data.
+   */
   for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
+    ValueProfRuntimeRecord R;
+    /* Extract the value profile data info from the runtime. */
+    if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
+      PROF_OOM_RETURN("Failed to write value profile data ");
+    /* Compute the size of ValueProfData from this runtime record.  */
+    if (getNumValueKindsRT(&R) != 0)
+      S += getValueProfDataSizeRT(&R);
+    finalizeValueProfRuntimeRecord(&R);
+  }
+  /* No value sites or no value profile data is collected. */
+  if (!S)
+    return 0;
 
-    uint64_t NumVSites = 0;
-    uint32_t VKI, i;
+  Extra = getVprofExtraBytes();
+  BufferCapacity = S + Extra;
+  *VDataArray = calloc(BufferCapacity, sizeof(uint8_t));
+  if (!*VDataArray)
+    PROF_OOM_RETURN("Failed to write value profile data ");
 
-    if (!I->Values)
+  ValueProfData *VD = (ValueProfData *)(*VDataArray);
+  /*
+   * Extract value profile data and write into ValueProfData structure
+   * one by one. Note that new value profile data added to any value
+   * site (from another thread) after the ValueProfRuntimeRecord is
+   * initialized (when the profile data snapshot is taken) won't be
+   * collected. This is not a problem as those dropped value will have
+   * very low taken count.
+   */
+  for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
+    ValueProfRuntimeRecord R;
+    if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
+      PROF_OOM_RETURN("Failed to write value profile data ");
+    if (getNumValueKindsRT(&R) == 0)
       continue;
 
-    ValueProfNode **ValueCounters = (ValueProfNode **)I->Values;
-
-    for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
-      NumVSites += I->NumValueSites[VKI];
-    uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites);
-
-    uint8_t *PerSiteCountPtr = PerSiteCountsHead;
-    InstrProfValueData *VDataPtr =
-        (InstrProfValueData *)(PerSiteCountPtr + NumVSites + Padding);
-
-    for (i = 0; i < NumVSites; ++i) {
-
-      ValueProfNode *VNode = ValueCounters[i];
-
-      uint8_t VDataCount = 0;
-      while (VNode && ((uint8_t *)(VDataPtr + 1) <= VDataEnd)) {
-        *VDataPtr = VNode->VData;
-        VNode = VNode->Next;
-        ++VDataPtr;
-        if (++VDataCount == UCHAR_MAX)
-          break;
-      }
-      *PerSiteCountPtr = VDataCount;
-      ++PerSiteCountPtr;
+    /* Record R has taken a snapshot of the VP data at this point. Newly
+       added VP data for this function will be dropped.  */
+    /* Check if there is enough space.  */
+    if (BufferCapacity - RealSize < getValueProfDataSizeRT(&R)) {
+      PROF_ERR("Value profile data is dropped :%s \n",
+               "Out of buffer space. Use environment "
+               " LLVM_VALUE_PROF_BUFFER_EXTRA to allocate more");
+      I->Values = 0;
     }
-    I->Values = (void *)PerSiteCountsHead;
-    PerSiteCountsHead = (uint8_t *)VDataPtr;
+
+    serializeValueProfDataFromRT(&R, VD);
+    deallocateValueProfileCounters(I);
+    I->Values = VD;
+    finalizeValueProfRuntimeRecord(&R);
+    RealSize += VD->TotalSize;
+    VD = (ValueProfData *)((char *)VD + VD->TotalSize);
   }
-  return PerSiteCountsHead - *VDataArray;
+
+  return RealSize;
 }

Added: compiler-rt/trunk/test/profile/instrprof-value-prof.c
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/profile/instrprof-value-prof.c?rev=254678&view=auto
==============================================================================
--- compiler-rt/trunk/test/profile/instrprof-value-prof.c (added)
+++ compiler-rt/trunk/test/profile/instrprof-value-prof.c Thu Dec  3 19:02:24 2015
@@ -0,0 +1,183 @@
+// RUN: %clang_profgen -O2 -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata merge -o %t-2.profdata %t-2.profraw
+// RUN: llvm-profdata merge -o %t-merged.profdata %t.profraw %t-2.profdata
+// RUN: llvm-profdata show --all-functions -ic-targets  %t-2.profdata | FileCheck  %s -check-prefix=NO-VALUE
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %s
+// value profile merging current do sorting based on target values -- this will destroy the order of the target
+// in the list leading to comparison problem. For now just check a small subset of output.
+// RUN: llvm-profdata show --all-functions -ic-targets  %t-merged.profdata | FileCheck  %s -check-prefix=MERGE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+typedef struct __llvm_profile_data __llvm_profile_data;
+const __llvm_profile_data *__llvm_profile_begin_data(void);
+const __llvm_profile_data *__llvm_profile_end_data(void);
+void __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
+                                        uint32_t ValueKind,
+                                        uint16_t NumValueSites);
+__llvm_profile_data *
+__llvm_profile_iterate_data(const __llvm_profile_data *Data);
+void *__llvm_get_function_addr(const __llvm_profile_data *Data);
+void __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
+                                      uint32_t CounterIndex);
+
+#define DEF_FUNC(x)                                                            \
+  void x() {}
+#define DEF_2_FUNCS(x) DEF_FUNC(x##_1) DEF_FUNC(x##_2)
+#define DEF_4_FUNCS(x) DEF_2_FUNCS(x##_1) DEF_2_FUNCS(x##_2)
+#define DEF_8_FUNCS(x) DEF_4_FUNCS(x##_1) DEF_4_FUNCS(x##_2)
+#define DEF_16_FUNCS(x) DEF_8_FUNCS(x##_1) DEF_8_FUNCS(x##_2)
+#define DEF_32_FUNCS(x) DEF_16_FUNCS(x##_1) DEF_16_FUNCS(x##_2)
+#define DEF_64_FUNCS(x) DEF_32_FUNCS(x##_1) DEF_32_FUNCS(x##_2)
+#define DEF_128_FUNCS(x) DEF_64_FUNCS(x##_1) DEF_64_FUNCS(x##_2)
+
+#define FUNC_ADDR(x) &x,
+#define FUNC_2_ADDRS(x) FUNC_ADDR(x##_1) FUNC_ADDR(x##_2)
+#define FUNC_4_ADDRS(x) FUNC_2_ADDRS(x##_1) FUNC_2_ADDRS(x##_2)
+#define FUNC_8_ADDRS(x) FUNC_4_ADDRS(x##_1) FUNC_4_ADDRS(x##_2)
+#define FUNC_16_ADDRS(x) FUNC_8_ADDRS(x##_1) FUNC_8_ADDRS(x##_2)
+#define FUNC_32_ADDRS(x) FUNC_16_ADDRS(x##_1) FUNC_16_ADDRS(x##_2)
+#define FUNC_64_ADDRS(x) FUNC_32_ADDRS(x##_1) FUNC_32_ADDRS(x##_2)
+#define FUNC_128_ADDRS(x) FUNC_64_ADDRS(x##_1) FUNC_64_ADDRS(x##_2)
+
+DEF_8_FUNCS(callee)
+DEF_128_FUNCS(caller)
+
+void *CallerAddrs[] = {FUNC_128_ADDRS(caller)};
+
+void *CalleeAddrs[] = {FUNC_8_ADDRS(callee)};
+
+static int cmpaddr(const void *p1, const void *p2) {
+  void *addr1 = *(void **)p1;
+  void *addr2 = *(void **)p2;
+  return (intptr_t)addr2 - (intptr_t)addr1;
+}
+
+int main(int argc, const char *argv[]) {
+  unsigned S, NS = 0, V, doInstrument = 1;
+  const __llvm_profile_data *Data, *DataEnd;
+
+  if (argc < 2)
+    doInstrument = 0;
+
+  qsort(CallerAddrs, sizeof(CallerAddrs) / sizeof(void *), sizeof(void *),
+        cmpaddr);
+
+  /* We will synthesis value profile data for 128 callers functions.
+   * The number of * value sites. The number values for each value site
+   * ranges from 0 to 8.  */
+
+  Data = __llvm_profile_begin_data();
+  DataEnd = __llvm_profile_end_data();
+
+  for (; Data < DataEnd; Data = __llvm_profile_iterate_data(Data)) {
+    void *func = __llvm_get_function_addr(Data);
+    if (bsearch(&func, CallerAddrs, sizeof(CallerAddrs) / sizeof(void *),
+                sizeof(void *), cmpaddr)) {
+      __llvm_profile_set_num_value_sites((__llvm_profile_data *)Data,
+                                         0 /*IPVK_IndirectCallTarget */, NS);
+      if (!doInstrument) {
+        NS++;
+        continue;
+      }
+      for (S = 0; S < NS; S++) {
+        for (V = 0; V < S % 8; V++) {
+          unsigned C;
+          for (C = 0; C < V + 1; C++)
+            __llvm_profile_instrument_target((uint64_t)CalleeAddrs[V],
+                                             (void *)Data, S);
+        }
+      }
+      NS++;
+    }
+  }
+}
+
+// NO-VALUE: Indirect Call Site Count: 127
+// NO-VALUE-NEXT: Indirect Target Results:
+// MERGE: Indirect Call Site Count: 127
+// MERGE-NEXT: Indirect Target Results:
+// MERGE-NEXT:  [ 1, callee_1_1_1, 1 ]
+// CHECK: Indirect Call Site Count: 127
+// CHECK-NEXT: Indirect Target Results:
+// CHECK-NEXT:  [ 1, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 2, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 2, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 3, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 3, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 3, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 4, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 4, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 4, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 4, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 5, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 5, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 5, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 5, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 5, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 6, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 6, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 6, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 6, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 6, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 6, callee_2_1_2, 6 ]
+// CHECK-NEXT:  [ 7, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 7, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 7, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 7, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 7, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 7, callee_2_1_2, 6 ]
+// CHECK-NEXT:  [ 7, callee_2_2_1, 7 ]
+// CHECK-NEXT:  [ 9, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 10, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 10, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 11, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 11, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 11, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 12, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 12, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 12, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 12, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 13, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 13, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 13, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 13, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 13, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 14, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 14, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 14, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 14, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 14, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 14, callee_2_1_2, 6 ]
+// CHECK-NEXT:  [ 15, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 15, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 15, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 15, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 15, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 15, callee_2_1_2, 6 ]
+// CHECK-NEXT:  [ 15, callee_2_2_1, 7 ]
+// CHECK-NEXT:  [ 17, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 18, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 18, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 19, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 19, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 19, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 20, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 20, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 20, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 20, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 21, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 21, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 21, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 21, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 21, callee_2_1_1, 5 ]
+// CHECK-NEXT:  [ 22, callee_1_1_1, 1 ]
+// CHECK-NEXT:  [ 22, callee_1_1_2, 2 ]
+// CHECK-NEXT:  [ 22, callee_1_2_1, 3 ]
+// CHECK-NEXT:  [ 22, callee_1_2_2, 4 ]
+// CHECK-NEXT:  [ 22, callee_2_1_1, 5 ]
+




More information about the llvm-commits mailing list