[llvm] 5bfefff - Reland [FuncSpec] Split the specialization bonus into CodeSize and Latency.

Wed Aug 2 04:51:41 PDT 2023

Author: Alexandros Lamprineas
Date: 2023-08-02T12:41:13+01:00
New Revision: 5bfefff1c44fd992b673e1ff9c9f1865f9d81af1

URL: https://github.com/llvm/llvm-project/commit/5bfefff1c44fd992b673e1ff9c9f1865f9d81af1
DIFF: https://github.com/llvm/llvm-project/commit/5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.diff

LOG: Reland [FuncSpec] Split the specialization bonus into CodeSize and Latency.

Currently we use a combined metric TargetTransformInfo::TCK_SizeAndLatency
when estimating the specialization bonus. This is suboptimal, and in some
cases erroneous. For example we shouldn't be weighting the codesize decrease
attributed to constant propagation by the block frequency of the dead code.
Instead only the latency savings should be weighted by block frequency. The
total codesize savings from all the specialization arguments should be
deducted from the specialization cost.

Differential Revision: https://reviews.llvm.org/D155103

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
    llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
    llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5efb3cb240ff8d..8bcb928bab3100 100644

--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -105,17 +105,49 @@ struct Spec {
   SpecSig Sig;
 
   // Profitability of the specialization.
-  Cost Score;
+  unsigned Score;
 
   // List of call sites, matching this specialization.
   SmallVector<CallBase *> CallSites;
 
-  Spec(Function *F, const SpecSig &S, Cost Score)
+  Spec(Function *F, const SpecSig &S, unsigned Score)
       : F(F), Sig(S), Score(Score) {}
-  Spec(Function *F, const SpecSig &&S, Cost Score)
+  Spec(Function *F, const SpecSig &&S, unsigned Score)
       : F(F), Sig(S), Score(Score) {}
 };
 
+struct Bonus {
+  unsigned CodeSize = 0;
+  unsigned Latency = 0;
+
+  Bonus() = default;
+
+  Bonus(Cost CodeSize, Cost Latency) {
+    int64_t Sz = *CodeSize.getValue();
+    int64_t Ltc = *Latency.getValue();
+
+    assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
+    // It is safe to down cast since we know the arguments
+    // cannot be negative and Cost is of type int64_t.
+    this->CodeSize = static_cast<unsigned>(Sz);
+    this->Latency = static_cast<unsigned>(Ltc);
+  }
+
+  Bonus &operator+=(const Bonus RHS) {
+    CodeSize += RHS.CodeSize;
+    Latency += RHS.Latency;
+    return *this;
+  }
+
+  Bonus operator+(const Bonus RHS) const {
+    return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
+  }
+
+  bool operator==(const Bonus RHS) const {
+    return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
+  }
+};
+
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   const DataLayout &DL;
   BlockFrequencyInfo &BFI;
@@ -144,10 +176,10 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
     return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
   }
 
-  Cost getUserBonus(Instruction *User, Value *Use = nullptr,
-                    Constant *C = nullptr);
+  Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
+                     Constant *C = nullptr);
 
-  Cost getBonusFromPendingPHIs();
+  Bonus getBonusFromPendingPHIs();
 
 private:
   friend class InstVisitor<InstCostVisitor, Constant *>;
@@ -209,8 +241,8 @@ class FunctionSpecializer {
   }
 
   /// Compute a bonus for replacing argument \p A with constant \p C.
-  Cost getSpecializationBonus(Argument *A, Constant *C,
-                              InstCostVisitor &Visitor);
+  Bonus getSpecializationBonus(Argument *A, Constant *C,
+                               InstCostVisitor &Visitor);
 
 private:
   Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
@@ -237,7 +269,7 @@ class FunctionSpecializer {
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
   /// @return True, if any potential specializations were found
-  bool findSpecializations(Function *F, Cost SpecCost,
+  bool findSpecializations(Function *F, unsigned SpecCost,
                            SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
 
   bool isCandidateFunction(Function *F);

diff  --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index d917342a7d2905..cc02f94086cd1b 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -101,29 +101,21 @@ static cl::opt<bool> SpecializeLiteralConstant(
     "Enable specialization of functions that take a literal constant as an "
     "argument"));
 
-// Estimates the instruction cost of all the basic blocks in \p WorkList.
-// The successors of such blocks are added to the list as long as they are
-// executable and they have a unique predecessor. \p WorkList represents
-// the basic blocks of a specialization which become dead once we replace
-// instructions that are known to be constants. The aim here is to estimate
-// the combination of size and latency savings in comparison to the non
-// specialized version of the function.
+// Estimates the codesize savings due to dead code after constant propagation.
+// \p WorkList represents the basic blocks of a specialization which will
+// eventually become dead once we replace instructions that are known to be
+// constants. The successors of such blocks are added to the list as long as
+// the \p Solver found they were executable prior to specialization, and only
+// if they have a unique predecessor.
 static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
                                 DenseSet<BasicBlock *> &DeadBlocks,
                                 ConstMap &KnownConstants, SCCPSolver &Solver,
-                                BlockFrequencyInfo &BFI,
                                 TargetTransformInfo &TTI) {
-  Cost Bonus = 0;
-
+  Cost CodeSize = 0;
   // Accumulate the instruction cost of each basic block weighted by frequency.
   while (!WorkList.empty()) {
     BasicBlock *BB = WorkList.pop_back_val();
 
-    uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
-                      BFI.getEntryFreq();
-    if (!Weight)
-      continue;
-
     // These blocks are considered dead as far as the InstCostVisitor
     // is concerned. They haven't been proven dead yet by the Solver,
     // but may become if we propagate the specialization arguments.
@@ -139,11 +131,11 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
       if (KnownConstants.contains(&I))
         continue;
 
-      Bonus += Weight *
-          TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+      Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus " << Bonus
-                        << " after user " << I << "\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization:     CodeSize " << C
+                        << " for user " << I << "\n");
+      CodeSize += C;
     }
 
     // Keep adding dead successors to the list as long as they are
@@ -153,7 +145,7 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
           SuccBB->getUniquePredecessor() == BB)
         WorkList.push_back(SuccBB);
   }
-  return Bonus;
+  return CodeSize;
 }
 
 static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
@@ -164,55 +156,57 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
   return nullptr;
 }
 
-Cost InstCostVisitor::getBonusFromPendingPHIs() {
-  Cost Bonus = 0;
+Bonus InstCostVisitor::getBonusFromPendingPHIs() {
+  Bonus B;
   while (!PendingPHIs.empty()) {
     Instruction *Phi = PendingPHIs.pop_back_val();
     // The pending PHIs could have been proven dead by now.
     if (isBlockExecutable(Phi->getParent()))
-      Bonus += getUserBonus(Phi);
+      B += getUserBonus(Phi);
   }
-  return Bonus;
+  return B;
 }
 
-Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
+Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
   // We have already propagated a constant for this user.
   if (KnownConstants.contains(User))
-    return 0;
+    return {0, 0};
 
   // Cache the iterator before visiting.
   LastVisited = Use ? KnownConstants.insert({Use, C}).first
                     : KnownConstants.end();
 
-  if (auto *I = dyn_cast<SwitchInst>(User))
-    return estimateSwitchInst(*I);
-
-  if (auto *I = dyn_cast<BranchInst>(User))
-    return estimateBranchInst(*I);
-
-  C = visit(*User);
-  if (!C)
-    return 0;
+  Cost CodeSize = 0;
+  if (auto *I = dyn_cast<SwitchInst>(User)) {
+    CodeSize = estimateSwitchInst(*I);
+  } else if (auto *I = dyn_cast<BranchInst>(User)) {
+    CodeSize = estimateBranchInst(*I);
+  } else {
+    C = visit(*User);
+    if (!C)
+      return {0, 0};
+    KnownConstants.insert({User, C});
+  }
 
-  KnownConstants.insert({User, C});
+  CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
 
   uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
                     BFI.getEntryFreq();
-  if (!Weight)
-    return 0;
 
-  Cost Bonus = Weight *
-      TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
+  Cost Latency = Weight *
+      TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus " << Bonus
-                    << " for user " << *User << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization:     {CodeSize = " << CodeSize
+                    << ", Latency = " << Latency << "} for user "
+                    << *User << "\n");
 
+  Bonus B(CodeSize, Latency);
   for (auto *U : User->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (UI != User && isBlockExecutable(UI->getParent()))
-        Bonus += getUserBonus(UI, User, C);
+        B += getUserBonus(UI, User, C);
 
-  return Bonus;
+  return B;
 }
 
 Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
@@ -238,8 +232,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
     WorkList.push_back(BB);
   }
 
-  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
-                             TTI);
+  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
 }
 
 Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
@@ -256,8 +249,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
       Succ->getUniquePredecessor() == I.getParent())
     WorkList.push_back(Succ);
 
-  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
-                             TTI);
+  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
 }
 
 Constant *InstCostVisitor::visitPHINode(PHINode &I) {
@@ -578,13 +570,18 @@ bool FunctionSpecializer::run() {
     if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
       continue;
 
+    int64_t Sz = *Metrics.NumInsts.getValue();
+    assert(Sz > 0 && "CodeSize should be positive");
+    // It is safe to down cast from int64_t, NumInsts is always positive.
+    unsigned SpecCost = static_cast<unsigned>(Sz);
+
     LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
-                      << F.getName() << " is " << Metrics.NumInsts << "\n");
+                      << F.getName() << " is " << SpecCost << "\n");
 
     if (Inserted && Metrics.isRecursive)
       promoteConstantStackValues(&F);
 
-    if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) {
+    if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) {
       LLVM_DEBUG(
           dbgs() << "FnSpecialization: No possible specializations found for "
                  << F.getName() << "\n");
@@ -719,7 +716,7 @@ static Function *cloneCandidateFunction(Function *F) {
   return Clone;
 }
 
-bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
+bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
                                               SmallVectorImpl<Spec> &AllSpecs,
                                               SpecMap &SM) {
   // A mapping from a specialisation signature to the index of the respective
@@ -785,21 +782,22 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
       AllSpecs[Index].CallSites.push_back(&CS);
     } else {
       // Calculate the specialisation gain.
-      Cost Score = 0;
+      Bonus B;
       InstCostVisitor Visitor = getInstCostVisitorFor(F);
       for (ArgInfo &A : S.Args)
-        Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
-      Score += Visitor.getBonusFromPendingPHIs();
+        B += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+      B += Visitor.getBonusFromPendingPHIs();
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = "
-                        << Score << "\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = "
+                        << B.CodeSize << ", Latency = " << B.Latency
+                        << "}\n");
 
       // Discard unprofitable specialisations.
-      if (!ForceSpecialization && Score <= SpecCost)
+      if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize)
         continue;
 
       // Create a new specialisation entry.
-      auto &Spec = AllSpecs.emplace_back(F, S, Score);
+      auto &Spec = AllSpecs.emplace_back(F, S, B.Latency);
       if (CS.getFunction() != F)
         Spec.CallSites.push_back(&CS);
       const unsigned Index = AllSpecs.size() - 1;
@@ -866,19 +864,20 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
 }
 
 /// Compute a bonus for replacing argument \p A with constant \p C.
-Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
+Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
                                                  InstCostVisitor &Visitor) {
   LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
                     << C->getNameOrAsOperand() << "\n");
 
-  Cost TotalCost = 0;
+  Bonus B;
   for (auto *U : A->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (Visitor.isBlockExecutable(UI->getParent()))
-        TotalCost += Visitor.getUserBonus(UI, A, C);
+        B += Visitor.getUserBonus(UI, A, C);
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated user bonus "
-                    << TotalCost << " for argument " << *A << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated bonus {CodeSize = "
+                    << B.CodeSize << ", Latency = " << B.Latency
+                    << "} for argument " << *A << "\n");
 
   // The below heuristic is only concerned with exposing inlining
   // opportunities via indirect call promotion. If the argument is not a
@@ -888,7 +887,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
   // while traversing the users of the specialization arguments ?
   Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
   if (!CalledFunction)
-    return TotalCost;
+    return B;
 
   // Get TTI for the called function (used for the inline cost).
   auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -898,7 +897,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
   // calls to be promoted to direct calls. If the indirect call promotion
   // would likely enable the called function to be inlined, specializing is a
   // good idea.
-  int Bonus = 0;
+  int InliningBonus = 0;
   for (User *U : A->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       continue;
@@ -925,15 +924,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
     // We clamp the bonus for this call to be between zero and the default
     // threshold.
     if (IC.isAlways())
-      Bonus += Params.DefaultThreshold;
+      InliningBonus += Params.DefaultThreshold;
     else if (IC.isVariable() && IC.getCostDelta() > 0)
-      Bonus += IC.getCostDelta();
+      InliningBonus += IC.getCostDelta();
 
-    LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << Bonus
+    LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << InliningBonus
                       << " for user " << *U << "\n");
   }
 
-  return TotalCost + Bonus;
+  return B += {0, InliningBonus};
 }
 
 /// Determine if it is possible to specialise the function for constant values

diff  --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
index 6018263cad6586..f9bbbcffbb07d1 100644
--- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
+++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
@@ -96,12 +96,18 @@ class FunctionSpecializationTest : public testing::Test {
                                GetAC);
   }
 
-  Cost getInstCost(Instruction &I) {
+  Bonus getInstCost(Instruction &I, bool SizeOnly = false) {
     auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
     auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
 
-    return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
-         TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+    Cost CodeSize =
+        TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+    Cost Latency = SizeOnly ? 0 :
+        BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
+        TTI.getInstructionCost(&I, TargetTransformInfo::TCK_Latency);
+
+    return {CodeSize, Latency};
   }
 };
 
@@ -145,12 +151,13 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
 
   auto FuncIter = F->begin();
-  ++FuncIter;
+  BasicBlock &Loop = *++FuncIter;
   BasicBlock &Case1 = *++FuncIter;
   BasicBlock &Case2 = *++FuncIter;
   BasicBlock &BB1 = *++FuncIter;
   BasicBlock &BB2 = *++FuncIter;
 
+  Instruction &Switch = Loop.front();
   Instruction &Mul = Case1.front();
   Instruction &And = Case2.front();
   Instruction &Sdiv = *++Case2.begin();
@@ -160,22 +167,25 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Instruction &BrLoop = BB2.back();
 
   // mul
-  Cost Ref = getInstCost(Mul);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Mul);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // and + or + add
   Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
-
-  // sdiv + br + br
-  Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrLoop);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+
+  // switch + sdiv + br + br
+  Ref = getInstCost(Switch) +
+        getInstCost(Sdiv, /*SizeOnly =*/ true) +
+        getInstCost(BrBB2, /*SizeOnly =*/ true) +
+        getInstCost(BrLoop, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, BranchInst) {
@@ -207,10 +217,11 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Constant *False = ConstantInt::getFalse(M.getContext());
 
   auto FuncIter = F->begin();
-  ++FuncIter;
+  BasicBlock &Loop = *++FuncIter;
   BasicBlock &BB0 = *++FuncIter;
   BasicBlock &BB1 = *++FuncIter;
 
+  Instruction &Branch = Loop.front();
   Instruction &Mul = BB0.front();
   Instruction &Sub = *++BB0.begin();
   Instruction &BrBB1 = BB0.back();
@@ -219,23 +230,26 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Instruction &BrLoop = BB1.back();
 
   // mul
-  Cost Ref = getInstCost(Mul);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Mul);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // add
   Ref = getInstCost(Add);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
-
-  // sub + br + sdiv + br
-  Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) +
-        getInstCost(BrLoop);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+
+  // branch + sub + br + sdiv + br
+  Ref = getInstCost(Branch) +
+        getInstCost(Sub, /*SizeOnly =*/ true) +
+        getInstCost(BrBB1, /*SizeOnly =*/ true) +
+        getInstCost(Sdiv, /*SizeOnly =*/ true) +
+        getInstCost(BrLoop, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, Misc) {
@@ -281,26 +295,26 @@ TEST_F(FunctionSpecializationTest, Misc) {
   Instruction &Smax = *BlockIter++;
 
   // icmp + zext
-  Cost Ref = getInstCost(Icmp) + getInstCost(Zext);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Icmp) + getInstCost(Zext);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // select
   Ref = getInstCost(Select);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // gep + load + freeze + smax
   Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) +
         getInstCost(Smax);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
-  Bonus = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
-  EXPECT_TRUE(Bonus == 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 }
 
 TEST_F(FunctionSpecializationTest, PhiNode) {
@@ -342,29 +356,33 @@ TEST_F(FunctionSpecializationTest, PhiNode) {
   BasicBlock &BB = *++FuncIter;
 
   Instruction &PhiLoop = Loop.front();
+  Instruction &Switch = Loop.back();
   Instruction &Add = Case1.front();
   Instruction &PhiCase2 = Case2.front();
   Instruction &BrBB = Case2.back();
   Instruction &PhiBB = BB.front();
   Instruction &Icmp = *++BB.begin();
+  Instruction &Branch = BB.back();
 
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, 0);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 
-  // phi + br
-  Cost Ref = getInstCost(PhiCase2) + getInstCost(BrBB);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  // switch + phi + br
+  Bonus Ref = getInstCost(Switch) +
+              getInstCost(PhiCase2, /*SizeOnly =*/ true) +
+              getInstCost(BrBB, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
-  // phi + phi + add + icmp
+  // phi + phi + add + icmp + branch
   Ref = getInstCost(PhiBB) + getInstCost(PhiLoop) + getInstCost(Add) +
-        getInstCost(Icmp);
-  Bonus = Visitor.getBonusFromPendingPHIs();
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+        getInstCost(Icmp) + getInstCost(Branch);
+  Test = Visitor.getBonusFromPendingPHIs();
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }