[compiler-rt] r254678 - [PGO] Unify VP data format between raw and indexed profile (runtime)
Sean Silva via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 29 14:36:09 PDT 2016
On Thu, Dec 3, 2015 at 5:02 PM, Xinliang David Li via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: davidxl
> Date: Thu Dec 3 19:02:24 2015
> New Revision: 254678
>
> URL: http://llvm.org/viewvc/llvm-project?rev=254678&view=rev
> Log:
> [PGO] Unify VP data format between raw and indexed profile (runtime)
>
> With the latest refactoring and code sharing patches landed,
> it is possible to unify the value profile implementation between
> raw and indexed profile. This is part in prfofile runtime.
>
> Differential Revision: http://reviews.llvm.org/D15057
>
> Added:
> compiler-rt/trunk/test/profile/instrprof-value-prof.c
> Modified:
> compiler-rt/trunk/lib/profile/InstrProfiling.c
>
> Modified: compiler-rt/trunk/lib/profile/InstrProfiling.c
> URL:
> http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/profile/InstrProfiling.c?rev=254678&r1=254677&r2=254678&view=diff
>
> ==============================================================================
> --- compiler-rt/trunk/lib/profile/InstrProfiling.c (original)
> +++ compiler-rt/trunk/lib/profile/InstrProfiling.c Thu Dec 3 19:02:24 2015
> @@ -9,8 +9,19 @@
>
> #include "InstrProfiling.h"
> #include <limits.h>
> +#include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#define INSTR_PROF_VALUE_PROF_DATA
> +#define INSTR_PROF_COMMON_API_IMPL
> +#include "InstrProfData.inc"
> +
> +#define PROF_OOM(Msg) PROF_ERR(Msg ":%s\n", "Out of memory");
> +#define PROF_OOM_RETURN(Msg)
> \
> + {
> \
> + PROF_OOM(Msg)
> \
> + return 0;
> \
> + }
>
> LLVM_LIBRARY_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
> return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
> @@ -60,20 +71,29 @@ LLVM_LIBRARY_VISIBILITY void __llvm_prof
> }
> }
>
> -/* Total number of value profile data in bytes. */
> -static uint64_t TotalValueDataSize = 0;
> -
> -#ifdef _MIPS_ARCH
> +/* This method is only used in value profiler mock testing. */
> LLVM_LIBRARY_VISIBILITY void
> -__llvm_profile_instrument_target(uint64_t TargetValue, void *Data_,
> - uint32_t CounterIndex) {}
> +__llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
> + uint32_t ValueKind, uint16_t
> NumValueSites) {
> + *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
> +}
> +
> +/* This method is only used in value profiler mock testing. */
> +LLVM_LIBRARY_VISIBILITY const __llvm_profile_data *
> +__llvm_profile_iterate_data(const __llvm_profile_data *Data) {
> + return Data + 1;
> +}
>
> -#else
> +/* This method is only used in value profiler mock testing. */
> +LLVM_LIBRARY_VISIBILITY void *
> +__llvm_get_function_addr(const __llvm_profile_data *Data) {
> + return Data->FunctionPointer;
> +}
>
> /* Allocate an array that holds the pointers to the linked lists of
> * value profile counter nodes. The number of element of the array
> * is the total number of value profile sites instrumented. Returns
> - * 0 if allocation fails.
> + * 0 if allocation fails.
> */
>
> static int allocateValueProfileCounters(__llvm_profile_data *Data) {
> @@ -90,16 +110,27 @@ static int allocateValueProfileCounters(
> free(Mem);
> return 0;
> }
> - /* In the raw format, there will be an value count array preceding
> - * the value profile data. The element type of the array is uint8_t,
> - * and there is one element in array per value site. The element
> - * stores the number of values profiled for the corresponding site.
> - */
> - uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites);
> - __sync_fetch_and_add(&TotalValueDataSize, NumVSites + Padding);
> return 1;
> }
>
> +static void deallocateValueProfileCounters(__llvm_profile_data *Data) {
> + uint64_t NumVSites = 0, I;
> + uint32_t VKI;
> + if (!Data->Values)
> + return;
> + for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
> + NumVSites += Data->NumValueSites[VKI];
> + for (I = 0; I < NumVSites; I++) {
> + ValueProfNode *Node = ((ValueProfNode **)Data->Values)[I];
> + while (Node) {
> + ValueProfNode *Next = Node->Next;
> + free(Node);
> + Node = Next;
> + }
> + }
> + free(Data->Values);
> +}
> +
> LLVM_LIBRARY_VISIBILITY void
> __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
> uint32_t CounterIndex) {
> @@ -149,61 +180,87 @@ __llvm_profile_instrument_target(uint64_
> free(CurrentVNode);
> return;
> }
> - __sync_fetch_and_add(&TotalValueDataSize, Success *
> sizeof(ValueProfNode));
> }
> -#endif
> +
> +/* For multi-threaded programs, while the profile is being dumped, other
> + threads may still be updating the value profile data and creating new
> + value entries. To accommadate this, we need to add extra bytes to the
> + data buffer. The size of the extra space is controlled by an
> environment
> + varaible. */
> +static unsigned getVprofExtraBytes() {
> + const char *ExtraStr = getenv("LLVM_VALUE_PROF_BUFFER_EXTRA");
> + if (!ExtraStr || !ExtraStr[0])
> + return 1024;
> + return (unsigned)atoi(ExtraStr);
> +}
>
> LLVM_LIBRARY_VISIBILITY uint64_t
> __llvm_profile_gather_value_data(uint8_t **VDataArray) {
> + size_t S = 0, RealSize = 0, BufferCapacity = 0, Extra = 0;
> + __llvm_profile_data *I;
> + if (!VDataArray)
> + PROF_OOM_RETURN("Failed to write value profile data ");
>
> - if (!VDataArray || 0 == TotalValueDataSize)
> - return 0;
> -
> - uint64_t NumData = TotalValueDataSize;
> - *VDataArray = (uint8_t *)calloc(NumData, sizeof(uint8_t));
> - if (!*VDataArray)
> - return 0;
> -
> - uint8_t *VDataEnd = *VDataArray + NumData;
> - uint8_t *PerSiteCountsHead = *VDataArray;
> const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
> const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
> - __llvm_profile_data *I;
> +
> + /*
> + * Compute the total Size of the buffer to hold ValueProfData
> + * structures for functions with value profile data.
> + */
> for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
> + ValueProfRuntimeRecord R;
> + /* Extract the value profile data info from the runtime. */
> + if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
> + PROF_OOM_RETURN("Failed to write value profile data ");
> + /* Compute the size of ValueProfData from this runtime record. */
> + if (getNumValueKindsRT(&R) != 0)
> + S += getValueProfDataSizeRT(&R);
> + finalizeValueProfRuntimeRecord(&R);
> + }
> + /* No value sites or no value profile data is collected. */
> + if (!S)
> + return 0;
>
> - uint64_t NumVSites = 0;
> - uint32_t VKI, i;
> + Extra = getVprofExtraBytes();
> + BufferCapacity = S + Extra;
> + *VDataArray = calloc(BufferCapacity, sizeof(uint8_t));
> + if (!*VDataArray)
> + PROF_OOM_RETURN("Failed to write value profile data ");
>
> - if (!I->Values)
> + ValueProfData *VD = (ValueProfData *)(*VDataArray);
> + /*
> + * Extract value profile data and write into ValueProfData structure
> + * one by one. Note that new value profile data added to any value
> + * site (from another thread) after the ValueProfRuntimeRecord is
> + * initialized (when the profile data snapshot is taken) won't be
> + * collected. This is not a problem as those dropped value will have
> + * very low taken count.
> + */
> + for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
> + ValueProfRuntimeRecord R;
> + if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
> + PROF_OOM_RETURN("Failed to write value profile data ");
> + if (getNumValueKindsRT(&R) == 0)
> continue;
>
> - ValueProfNode **ValueCounters = (ValueProfNode **)I->Values;
> -
> - for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
> - NumVSites += I->NumValueSites[VKI];
> - uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites);
> -
> - uint8_t *PerSiteCountPtr = PerSiteCountsHead;
> - InstrProfValueData *VDataPtr =
> - (InstrProfValueData *)(PerSiteCountPtr + NumVSites + Padding);
> -
> - for (i = 0; i < NumVSites; ++i) {
> -
> - ValueProfNode *VNode = ValueCounters[i];
> -
> - uint8_t VDataCount = 0;
> - while (VNode && ((uint8_t *)(VDataPtr + 1) <= VDataEnd)) {
> - *VDataPtr = VNode->VData;
> - VNode = VNode->Next;
> - ++VDataPtr;
> - if (++VDataCount == UCHAR_MAX)
> - break;
> - }
> - *PerSiteCountPtr = VDataCount;
> - ++PerSiteCountPtr;
> + /* Record R has taken a snapshot of the VP data at this point. Newly
> + added VP data for this function will be dropped. */
> + /* Check if there is enough space. */
> + if (BufferCapacity - RealSize < getValueProfDataSizeRT(&R)) {
> + PROF_ERR("Value profile data is dropped :%s \n",
> + "Out of buffer space. Use environment "
> + " LLVM_VALUE_PROF_BUFFER_EXTRA to allocate more");
> + I->Values = 0;
> }
> - I->Values = (void *)PerSiteCountsHead;
> - PerSiteCountsHead = (uint8_t *)VDataPtr;
> +
> + serializeValueProfDataFromRT(&R, VD);
> + deallocateValueProfileCounters(I);
> + I->Values = VD;
> + finalizeValueProfRuntimeRecord(&R);
> + RealSize += VD->TotalSize;
> + VD = (ValueProfData *)((char *)VD + VD->TotalSize);
> }
> - return PerSiteCountsHead - *VDataArray;
> +
> + return RealSize;
> }
>
> Added: compiler-rt/trunk/test/profile/instrprof-value-prof.c
> URL:
> http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/profile/instrprof-value-prof.c?rev=254678&view=auto
>
> ==============================================================================
> --- compiler-rt/trunk/test/profile/instrprof-value-prof.c (added)
> +++ compiler-rt/trunk/test/profile/instrprof-value-prof.c Thu Dec 3
> 19:02:24 2015
> @@ -0,0 +1,183 @@
> +// RUN: %clang_profgen -O2 -o %t %s
> +// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t 1
> +// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t
> +// RUN: llvm-profdata merge -o %t.profdata %t.profraw
> +// RUN: llvm-profdata merge -o %t-2.profdata %t-2.profraw
> +// RUN: llvm-profdata merge -o %t-merged.profdata %t.profraw %t-2.profdata
> +// RUN: llvm-profdata show --all-functions -ic-targets %t-2.profdata |
> FileCheck %s -check-prefix=NO-VALUE
> +// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata |
> FileCheck %s
> +// value profile merging current do sorting based on target values --
> this will destroy the order of the target
> +// in the list leading to comparison problem. For now just check a small
> subset of output.
> +// RUN: llvm-profdata show --all-functions -ic-targets
> %t-merged.profdata | FileCheck %s -check-prefix=MERGE
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +typedef struct __llvm_profile_data __llvm_profile_data;
> +const __llvm_profile_data *__llvm_profile_begin_data(void);
> +const __llvm_profile_data *__llvm_profile_end_data(void);
> +void __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
> + uint32_t ValueKind,
> + uint16_t NumValueSites);
> +__llvm_profile_data *
> +__llvm_profile_iterate_data(const __llvm_profile_data *Data);
> +void *__llvm_get_function_addr(const __llvm_profile_data *Data);
> +void __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
> + uint32_t CounterIndex);
> +
> +#define DEF_FUNC(x)
> \
> + void x() {}
> +#define DEF_2_FUNCS(x) DEF_FUNC(x##_1) DEF_FUNC(x##_2)
> +#define DEF_4_FUNCS(x) DEF_2_FUNCS(x##_1) DEF_2_FUNCS(x##_2)
> +#define DEF_8_FUNCS(x) DEF_4_FUNCS(x##_1) DEF_4_FUNCS(x##_2)
> +#define DEF_16_FUNCS(x) DEF_8_FUNCS(x##_1) DEF_8_FUNCS(x##_2)
> +#define DEF_32_FUNCS(x) DEF_16_FUNCS(x##_1) DEF_16_FUNCS(x##_2)
> +#define DEF_64_FUNCS(x) DEF_32_FUNCS(x##_1) DEF_32_FUNCS(x##_2)
> +#define DEF_128_FUNCS(x) DEF_64_FUNCS(x##_1) DEF_64_FUNCS(x##_2)
> +
> +#define FUNC_ADDR(x) &x,
> +#define FUNC_2_ADDRS(x) FUNC_ADDR(x##_1) FUNC_ADDR(x##_2)
> +#define FUNC_4_ADDRS(x) FUNC_2_ADDRS(x##_1) FUNC_2_ADDRS(x##_2)
> +#define FUNC_8_ADDRS(x) FUNC_4_ADDRS(x##_1) FUNC_4_ADDRS(x##_2)
> +#define FUNC_16_ADDRS(x) FUNC_8_ADDRS(x##_1) FUNC_8_ADDRS(x##_2)
> +#define FUNC_32_ADDRS(x) FUNC_16_ADDRS(x##_1) FUNC_16_ADDRS(x##_2)
> +#define FUNC_64_ADDRS(x) FUNC_32_ADDRS(x##_1) FUNC_32_ADDRS(x##_2)
> +#define FUNC_128_ADDRS(x) FUNC_64_ADDRS(x##_1) FUNC_64_ADDRS(x##_2)
> +
> +DEF_8_FUNCS(callee)
> +DEF_128_FUNCS(caller)
> +
> +void *CallerAddrs[] = {FUNC_128_ADDRS(caller)};
> +
> +void *CalleeAddrs[] = {FUNC_8_ADDRS(callee)};
> +
> +static int cmpaddr(const void *p1, const void *p2) {
> + void *addr1 = *(void **)p1;
> + void *addr2 = *(void **)p2;
> + return (intptr_t)addr2 - (intptr_t)addr1;
> +}
> +
> +int main(int argc, const char *argv[]) {
> + unsigned S, NS = 0, V, doInstrument = 1;
> + const __llvm_profile_data *Data, *DataEnd;
> +
> + if (argc < 2)
> + doInstrument = 0;
> +
> + qsort(CallerAddrs, sizeof(CallerAddrs) / sizeof(void *), sizeof(void *),
> + cmpaddr);
> +
> + /* We will synthesis value profile data for 128 callers functions.
> + * The number of * value sites. The number values for each value site
> + * ranges from 0 to 8. */
>
This comment looks like it is missing some parts. Can you please update it?
-- Sean Silva
> +
> + Data = __llvm_profile_begin_data();
> + DataEnd = __llvm_profile_end_data();
> +
> + for (; Data < DataEnd; Data = __llvm_profile_iterate_data(Data)) {
> + void *func = __llvm_get_function_addr(Data);
> + if (bsearch(&func, CallerAddrs, sizeof(CallerAddrs) / sizeof(void *),
> + sizeof(void *), cmpaddr)) {
> + __llvm_profile_set_num_value_sites((__llvm_profile_data *)Data,
> + 0 /*IPVK_IndirectCallTarget */,
> NS);
> + if (!doInstrument) {
> + NS++;
> + continue;
> + }
> + for (S = 0; S < NS; S++) {
> + for (V = 0; V < S % 8; V++) {
> + unsigned C;
> + for (C = 0; C < V + 1; C++)
> + __llvm_profile_instrument_target((uint64_t)CalleeAddrs[V],
> + (void *)Data, S);
> + }
> + }
> + NS++;
> + }
> + }
> +}
> +
> +// NO-VALUE: Indirect Call Site Count: 127
> +// NO-VALUE-NEXT: Indirect Target Results:
> +// MERGE: Indirect Call Site Count: 127
> +// MERGE-NEXT: Indirect Target Results:
> +// MERGE-NEXT: [ 1, callee_1_1_1, 1 ]
> +// CHECK: Indirect Call Site Count: 127
> +// CHECK-NEXT: Indirect Target Results:
> +// CHECK-NEXT: [ 1, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 2, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 2, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 3, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 3, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 3, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 4, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 4, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 4, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 4, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 5, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 5, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 5, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 5, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 5, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 6, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 6, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 6, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 6, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 6, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 6, callee_2_1_2, 6 ]
> +// CHECK-NEXT: [ 7, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 7, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 7, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 7, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 7, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 7, callee_2_1_2, 6 ]
> +// CHECK-NEXT: [ 7, callee_2_2_1, 7 ]
> +// CHECK-NEXT: [ 9, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 10, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 10, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 11, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 11, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 11, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 12, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 12, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 12, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 12, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 13, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 13, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 13, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 13, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 13, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 14, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 14, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 14, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 14, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 14, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 14, callee_2_1_2, 6 ]
> +// CHECK-NEXT: [ 15, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 15, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 15, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 15, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 15, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 15, callee_2_1_2, 6 ]
> +// CHECK-NEXT: [ 15, callee_2_2_1, 7 ]
> +// CHECK-NEXT: [ 17, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 18, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 18, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 19, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 19, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 19, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 20, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 20, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 20, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 20, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 21, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 21, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 21, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 21, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 21, callee_2_1_1, 5 ]
> +// CHECK-NEXT: [ 22, callee_1_1_1, 1 ]
> +// CHECK-NEXT: [ 22, callee_1_1_2, 2 ]
> +// CHECK-NEXT: [ 22, callee_1_2_1, 3 ]
> +// CHECK-NEXT: [ 22, callee_1_2_2, 4 ]
> +// CHECK-NEXT: [ 22, callee_2_1_1, 5 ]
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160329/19d2c47a/attachment.html>
More information about the llvm-commits
mailing list