r203592 - PGO: Scale large counters down to 32-bits

Tue Mar 11 15:03:27 PDT 2014

On Tue, Mar 11, 2014 at 11:18 AM, Duncan P. N. Exon Smith
<dexonsmith at apple.com> wrote:
> Author: dexonsmith
> Date: Tue Mar 11 13:18:10 2014
> New Revision: 203592
>
> URL: http://llvm.org/viewvc/llvm-project?rev=203592&view=rev
> Log:
> PGO: Scale large counters down to 32-bits
>
> PGO counters are 64-bit and branch weights are 32-bit.  Scale them down
> when necessary, instead of just taking the lower 32 bits.
>
> <rdar://problem/16276448>
>
> Added:
>     cfe/trunk/test/Profile/Inputs/c-counter-overflows.profdata
>     cfe/trunk/test/Profile/c-counter-overflows.c
> Modified:
>     cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.cpp?rev=203592&r1=203591&r2=203592&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenPGO.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenPGO.cpp Tue Mar 11 13:18:10 2014
> @@ -872,29 +872,59 @@ void CodeGenPGO::destroyRegionCounters()
>      delete RegionCounts;
>  }
>
> +/// \brief Calculate what to divide by to scale weights.
> +///
> +/// Given the maximum weight, calculate a divisor that will scale all the
> +/// weights to strictly less than UINT32_MAX.
> +static uint64_t calculateWeightScale(uint64_t MaxWeight) {
> +  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
> +}
> +
> +/// \brief Scale an individual branch weight (and add 1).
> +///
> +/// Scale a 64-bit weight down to 32-bits using \c Scale.
> +///
> +/// According to Laplace's Rule of Succession, it is better to compute the
> +/// weight based on the count plus 1, so universally add 1 to the value.
> +///
> +/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
> +/// greater than \c Weight.
> +static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
> +  assert(Scale && "scale by 0?");
> +  uint64_t Scaled = Weight / Scale + 1;
> +  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
> +  return Scaled;
> +}
> +
>  llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
>                                                uint64_t FalseCount) {
> +  // Check for empty weights.
>    if (!TrueCount && !FalseCount)
>      return 0;
>
> +  // Calculate how to scale down to 32-bits.
> +  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
> +
>    llvm::MDBuilder MDHelper(CGM.getLLVMContext());
> -  // TODO: need to scale down to 32-bits
> -  // According to Laplace's Rule of Succession, it is better to compute the
> -  // weight based on the count plus 1.
> -  return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
> +  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
> +                                      scaleBranchWeight(FalseCount, Scale));
>  }
>
>  llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
> -  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
> -  // TODO: need to scale down to 32-bits, instead of just truncating.
> -  // According to Laplace's Rule of Succession, it is better to compute the
> -  // weight based on the count plus 1.
> +  // We need at least two elements to create meaningful weights.
> +  if (Weights.size() < 2)
> +    return 0;
> +
> +  // Calculate how to scale down to 32-bits.
> +  uint64_t Scale = calculateWeightScale(*std::max_element(Weights.begin(),
> +                                                          Weights.end()));
> +
>    SmallVector<uint32_t, 16> ScaledWeights;
>    ScaledWeights.reserve(Weights.size());
> -  for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
> -       WI != WE; ++WI) {
> -    ScaledWeights.push_back(*WI + 1);
> -  }
> +  for (uint64_t W : Weights)
> +    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
> +
> +  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
>    return MDHelper.createBranchWeights(ScaledWeights);
>  }
>
>
> Added: cfe/trunk/test/Profile/Inputs/c-counter-overflows.profdata
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Profile/Inputs/c-counter-overflows.profdata?rev=203592&view=auto
> ==============================================================================
> --- cfe/trunk/test/Profile/Inputs/c-counter-overflows.profdata (added)
> +++ cfe/trunk/test/Profile/Inputs/c-counter-overflows.profdata Tue Mar 11 13:18:10 2014
> @@ -0,0 +1,10 @@
> +main 8
> +1
> +68719476720
> +64424509425
> +68719476720
> +21474836475
> +21474836475
> +21474836475
> +4294967295
> +
>
> Added: cfe/trunk/test/Profile/c-counter-overflows.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Profile/c-counter-overflows.c?rev=203592&view=auto
> ==============================================================================
> --- cfe/trunk/test/Profile/c-counter-overflows.c (added)
> +++ cfe/trunk/test/Profile/c-counter-overflows.c Tue Mar 11 13:18:10 2014
> @@ -0,0 +1,49 @@
> +// Test that big branch weights get scaled down to 32-bits, rather than just
> +// truncated.
> +
> +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-counter-overflows.c %s -o - -emit-llvm -fprofile-instr-use=%S/Inputs/c-counter-overflows.profdata | FileCheck %s
> +
> +#include <stdint.h>

Test cases shouldn't include external headers - please explicitly
provide the declarations your test requires. (this is causing buildbot
fallout: http://lab.llvm.org:8011/builders/clang-x86_64-ubuntu-gdb-75/builds/13342/steps/check-all/logs/Clang%3A%3Ac-counter-overflows.c
)

> +
> +// PGOGEN: @[[MAIN:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer
> +int main(int argc, const char *argv[]) {
> +  // Need counts higher than 32-bits.
> +  // CHECK: br {{.*}} !prof ![[FOR:[0-9]+]]
> +  // max   = 0xffffffff0
> +  // scale = 0xffffffff0 / 0xffffffff + 1 = 17
> +  // loop-body: 0xffffffff0 / 17 + 1 = 0xf0f0f0f0 + 1 = 4042322161 => -252645135
> +  // loop-exit: 0x000000001 / 17 + 1 = 0x00000000 + 1 =          1 =>          1
> +  for (uint64_t I = 0; I < 0xffffffff0; ++I) {
> +    // max   = 0xffffffff * 15 = 0xefffffff1
> +    // scale = 0xefffffff1 / 0xffffffff + 1 = 16
> +    // CHECK: br {{.*}} !prof ![[IF:[0-9]+]]
> +    if (I & 0xf) {
> +      // 0xefffffff1 / 16 + 1 = 0xefffffff + 1 = 4026531840 => -268435456
> +    } else {
> +      // 0x0ffffffff / 16 + 1 = 0x0fffffff + 1 =  268435456 =>  268435456
> +    }
> +
> +    // max   = 0xffffffff * 5 = 0x4fffffffb
> +    // scale = 0x4fffffffb / 0xffffffff + 1 = 6
> +    // CHECK: ], !prof ![[SWITCH:[0-9]+]]
> +    switch ((I & 0xf) / 5) {
> +    case 0:
> +      // 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883
> +      break;
> +    case 1:
> +      // 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883
> +      break;
> +    case 2:
> +      // 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883
> +      break;
> +    default:
> +      // 0x0ffffffff / 6 = 0x2aaaaaaa + 1 =  715827883 =>  715827883
> +      break;
> +    }
> +  }
> +  return 0;
> +}
> +
> +// CHECK-DAG: ![[FOR]] = metadata !{metadata !"branch_weights", i32 -252645135, i32 1}
> +// CHECK-DAG: ![[IF]]  = metadata !{metadata !"branch_weights", i32 -268435456, i32 268435456}
> +// CHECK-DAG: ![[SWITCH]] = metadata !{metadata !"branch_weights", i32 715827883, i32 -715827883, i32 -715827883, i32 -715827883}
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits