[llvm] r236750 - Fix information loss in branch probability computation.

Thu May 7 10:22:06 PDT 2015

Author: dnovillo
Date: Thu May  7 12:22:06 2015
New Revision: 236750

URL: http://llvm.org/viewvc/llvm-project?rev=236750&view=rev
Log:
Fix information loss in branch probability computation.

Summary:
This addresses PR 22718. When branch weights are too large, they were
being clamped to the range [1, MaxWeightForBB]. But this clamping is
only applied to edges that go outside the range, so it distorts the
relative branch probabilities.

This patch changes the weight calculation to scale every branch so the
relative probabilities are preserved. The scaling is done differently
now. First, all the branch weights are added up, and if the sum exceeds
32 bits, it computes an integer scale to bring all the weights within
the range.

The patch fixes an existing test that had slightly wrong branch
probabilities due to the previous clamping. It now gets branch weights
scaled accordingly.

Reviewers: dexonsmith

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D9442

Added:
    llvm/trunk/test/Analysis/BranchProbabilityInfo/pr22718.ll
Modified:
    llvm/trunk/lib/Analysis/BranchProbabilityInfo.cpp
    llvm/trunk/test/CodeGen/X86/MachineBranchProb.ll

Modified: llvm/trunk/lib/Analysis/BranchProbabilityInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/BranchProbabilityInfo.cpp?rev=236750&r1=236749&r2=236750&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/BranchProbabilityInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/BranchProbabilityInfo.cpp Thu May  7 12:22:06 2015
@@ -115,11 +115,6 @@ static const uint32_t NORMAL_WEIGHT = 16
 // Minimum weight of an edge. Please note, that weight is NEVER 0.
 static const uint32_t MIN_WEIGHT = 1;
 
-static uint32_t getMaxWeightFor(BasicBlock *BB) {
-  return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
-}
-
-
 /// \brief Calculate edge weights for successors lead to unreachable.
 ///
 /// Predict that a successor which leads necessarily to an
@@ -185,15 +180,18 @@ bool BranchProbabilityInfo::calcMetadata
   if (!WeightsNode)
     return false;
 
+  // Check that the number of successors is manageable.
+  assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
+
   // Ensure there are weights for all of the successors. Note that the first
   // operand to the metadata node is a name, not a weight.
   if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
     return false;
 
-  // Build up the final weights that will be used in a temporary buffer, but
-  // don't add them until all weights are present. Each weight value is clamped
-  // to [1, getMaxWeightFor(BB)].
-  uint32_t WeightLimit = getMaxWeightFor(BB);
+  // Build up the final weights that will be used in a temporary buffer.
+  // Compute the sum of all weights to later decide whether they need to
+  // be scaled to fit in 32 bits.
+  uint64_t WeightSum = 0;
   SmallVector<uint32_t, 2> Weights;
   Weights.reserve(TI->getNumSuccessors());
   for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
@@ -201,11 +199,26 @@ bool BranchProbabilityInfo::calcMetadata
         mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
     if (!Weight)
       return false;
-    Weights.push_back(Weight->getLimitedValue(WeightLimit));
+    assert(Weight->getValue().getActiveBits() <= 32 &&
+           "Too many bits for uint32_t");
+    Weights.push_back(Weight->getZExtValue());
+    WeightSum += Weights.back();
   }
   assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-    setEdgeWeight(BB, i, Weights[i]);
+
+  // If the sum of weights does not fit in 32 bits, scale every weight down
+  // accordingly.
+  uint64_t ScalingFactor =
+      (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
+
+  WeightSum = 0;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    uint32_t W = Weights[i] / ScalingFactor;
+    WeightSum += W;
+    setEdgeWeight(BB, i, W);
+  }
+  assert(WeightSum <= UINT32_MAX &&
+         "Expected weights to scale down to 32 bits");
 
   return true;
 }

Added: llvm/trunk/test/Analysis/BranchProbabilityInfo/pr22718.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/BranchProbabilityInfo/pr22718.ll?rev=236750&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/BranchProbabilityInfo/pr22718.ll (added)
+++ llvm/trunk/test/Analysis/BranchProbabilityInfo/pr22718.ll Thu May  7 12:22:06 2015
@@ -0,0 +1,84 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+; In this test, the else clause is taken about 90% of the time. This was not
+; reflected in the probability computation because the weight is larger than
+; the branch weight cap (about 2 billion).
+;
+; CHECK: edge for.body -> if.then probability is 216661881 / 2166666667 = 9.9
+; CHECK: edge for.body -> if.else probability is 1950004786 / 2166666667 = 90.0
+
+ at y = common global i64 0, align 8
+ at x = common global i64 0, align 8
+ at .str = private unnamed_addr constant [17 x i8] c"x = %lu\0Ay = %lu\0A\00", align 1
+
+; Function Attrs: inlinehint nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i64, align 8
+  store i32 0, i32* %retval
+  store i64 0, i64* @y, align 8
+  store i64 0, i64* @x, align 8
+  call void @srand(i32 422304) #3
+  store i64 0, i64* %i, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i64, i64* %i, align 8
+  %cmp = icmp ult i64 %0, 13000000000
+  br i1 %cmp, label %for.body, label %for.end, !prof !1
+
+for.body:                                         ; preds = %for.cond
+  %call = call i32 @rand() #3
+  %conv = sitofp i32 %call to double
+  %mul = fmul double %conv, 1.000000e+02
+  %div = fdiv double %mul, 0x41E0000000000000
+  %cmp1 = fcmp ogt double %div, 9.000000e+01
+  br i1 %cmp1, label %if.then, label %if.else, !prof !2
+
+if.then:                                          ; preds = %for.body
+  %1 = load i64, i64* @x, align 8
+  %inc = add i64 %1, 1
+  store i64 %inc, i64* @x, align 8
+  br label %if.end
+
+if.else:                                          ; preds = %for.body
+  %2 = load i64, i64* @y, align 8
+  %inc3 = add i64 %2, 1
+  store i64 %inc3, i64* @y, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %3 = load i64, i64* %i, align 8
+  %inc4 = add i64 %3, 1
+  store i64 %inc4, i64* %i, align 8
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i64, i64* @x, align 8
+  %5 = load i64, i64* @y, align 8
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), i64 %4, i64 %5)
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @srand(i32) #1
+
+; Function Attrs: nounwind
+declare i32 @rand() #1
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 236218) (llvm/trunk 236235)"}
+!1 = !{!"branch_weights", i32 -1044967295, i32 1}
+!2 = !{!"branch_weights", i32 433323762, i32 -394957723}

Modified: llvm/trunk/test/CodeGen/X86/MachineBranchProb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MachineBranchProb.ll?rev=236750&r1=236749&r2=236750&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MachineBranchProb.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MachineBranchProb.ll Thu May  7 12:22:06 2015
@@ -18,9 +18,9 @@ for.cond2:
   %or.cond = or i1 %tobool, %cmp4
   br i1 %or.cond, label %for.inc20, label %for.inc, !prof !0
 ; CHECK: BB#1: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(56008718) BB#4(2203492365)
+; CHECK: Successors according to CFG: BB#3(56008718) BB#4(3615818718)
 ; CHECK: BB#4: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(112017436) BB#2(4294967294)
+; CHECK: Successors according to CFG: BB#3(56008718) BB#2(3559810000)
 
 for.inc:                                          ; preds = %for.cond2
   %shl = shl i32 %bit.0, 1