[llvm] c6931c2 - [FuncSpec] Only compute Latency bonus when necessary (#113159)

Wed Oct 23 01:05:48 PDT 2024

Author: Hari Limaye
Date: 2024-10-23T09:05:44+01:00
New Revision: c6931c25255c913c5f0a650440b6150dd574f984

URL: https://github.com/llvm/llvm-project/commit/c6931c25255c913c5f0a650440b6150dd574f984
DIFF: https://github.com/llvm/llvm-project/commit/c6931c25255c913c5f0a650440b6150dd574f984.diff

LOG: [FuncSpec] Only compute Latency bonus when necessary (#113159)

Only compute the Latency component of a specialisation's Bonus when
necessary, to avoid unnecessarily computing the Block Frequency
Information for a Function.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
    llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
    llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll
    llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index b001771951e0fe..5920dde9d77dfd 100644

--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -140,41 +140,10 @@ struct Spec {
       : F(F), Sig(S), Score(Score) {}
 };
 
-struct Bonus {
-  unsigned CodeSize = 0;
-  unsigned Latency = 0;
-
-  Bonus() = default;
-
-  Bonus(Cost CodeSize, Cost Latency) {
-    int64_t Sz = *CodeSize.getValue();
-    int64_t Ltc = *Latency.getValue();
-
-    assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
-    // It is safe to down cast since we know the arguments
-    // cannot be negative and Cost is of type int64_t.
-    this->CodeSize = static_cast<unsigned>(Sz);
-    this->Latency = static_cast<unsigned>(Ltc);
-  }
-
-  Bonus &operator+=(const Bonus RHS) {
-    CodeSize += RHS.CodeSize;
-    Latency += RHS.Latency;
-    return *this;
-  }
-
-  Bonus operator+(const Bonus RHS) const {
-    return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
-  }
-
-  bool operator==(const Bonus RHS) const {
-    return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
-  }
-};
-
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
+  std::function<BlockFrequencyInfo &(Function &)> GetBFI;
+  Function *F;
   const DataLayout &DL;
-  BlockFrequencyInfo &BFI;
   TargetTransformInfo &TTI;
   SCCPSolver &Solver;
 
@@ -192,17 +161,20 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   ConstMap::iterator LastVisited;
 
 public:
-  InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
-                  TargetTransformInfo &TTI, SCCPSolver &Solver)
-      : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
+  InstCostVisitor(std::function<BlockFrequencyInfo &(Function &)> GetBFI,
+                  Function *F, const DataLayout &DL, TargetTransformInfo &TTI,
+                  SCCPSolver &Solver)
+      : GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {}
 
   bool isBlockExecutable(BasicBlock *BB) {
     return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
   }
 
-  Bonus getSpecializationBonus(Argument *A, Constant *C);
+  Cost getCodeSizeSavingsForArg(Argument *A, Constant *C);
+
+  Cost getCodeSizeSavingsFromPendingPHIs();
 
-  Bonus getBonusFromPendingPHIs();
+  Cost getLatencySavingsForKnownConstants();
 
 private:
   friend class InstVisitor<InstCostVisitor, Constant *>;
@@ -210,8 +182,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
                                     DenseSet<BasicBlock *> &DeadBlocks);
 
-  Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
-                     Constant *C = nullptr);
+  Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr,
+                                 Constant *C = nullptr);
 
   Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
   Cost estimateSwitchInst(SwitchInst &I);
@@ -283,9 +255,8 @@ class FunctionSpecializer {
   bool run();
 
   InstCostVisitor getInstCostVisitorFor(Function *F) {
-    auto &BFI = GetBFI(*F);
     auto &TTI = GetTTI(*F);
-    return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
+    return InstCostVisitor(GetBFI, F, M.getDataLayout(), TTI, Solver);
   }
 
 private:

diff  --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7feebbe420ae53..20249a20a37e41 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -112,7 +112,7 @@ bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
 Cost InstCostVisitor::estimateBasicBlocks(
                           SmallVectorImpl<BasicBlock *> &WorkList) {
   Cost CodeSize = 0;
-  // Accumulate the instruction cost of each basic block weighted by frequency.
+  // Accumulate the codesize savings of each basic block.
   while (!WorkList.empty()) {
     BasicBlock *BB = WorkList.pop_back_val();
 
@@ -154,37 +154,73 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
   return KnownConstants.lookup(V);
 }
 
-Bonus InstCostVisitor::getBonusFromPendingPHIs() {
-  Bonus B;
+Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() {
+  Cost CodeSize;
   while (!PendingPHIs.empty()) {
     Instruction *Phi = PendingPHIs.pop_back_val();
     // The pending PHIs could have been proven dead by now.
     if (isBlockExecutable(Phi->getParent()))
-      B += getUserBonus(Phi);
+      CodeSize += getCodeSizeSavingsForUser(Phi);
   }
-  return B;
+  return CodeSize;
 }
 
-/// Compute a bonus for replacing argument \p A with constant \p C.
-Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) {
+/// Compute the codesize savings for replacing argument \p A with constant \p C.
+Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C) {
   LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
                     << C->getNameOrAsOperand() << "\n");
-  Bonus B;
+  Cost CodeSize;
   for (auto *U : A->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (isBlockExecutable(UI->getParent()))
-        B += getUserBonus(UI, A, C);
+        CodeSize += getCodeSizeSavingsForUser(UI, A, C);
 
   LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated bonus {CodeSize = "
-                    << B.CodeSize << ", Latency = " << B.Latency
-                    << "} for argument " << *A << "\n");
-  return B;
+                    << CodeSize << "} for argument " << *A << "\n");
+  return CodeSize;
 }
 
-Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
+/// Compute the latency savings from replacing all arguments with constants for
+/// a specialization candidate. As this function computes the latency savings
+/// for all Instructions in KnownConstants at once, it should be called only
+/// after every instruction has been visited, i.e. after:
+///
+/// * getCodeSizeSavingsForArg has been run for every constant argument of a
+///   specialization candidate
+///
+/// * getCodeSizeSavingsFromPendingPHIs has been run
+///
+/// to ensure that the latency savings are calculated for all Instructions we
+/// have visited and found to be constant.
+Cost InstCostVisitor::getLatencySavingsForKnownConstants() {
+  auto &BFI = GetBFI(*F);
+  Cost TotalLatency = 0;
+
+  for (auto Pair : KnownConstants) {
+    Instruction *I = dyn_cast<Instruction>(Pair.first);
+    if (!I)
+      continue;
+
+    uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() /
+                      BFI.getEntryFreq().getFrequency();
+
+    Cost Latency =
+        Weight * TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+
+    LLVM_DEBUG(dbgs() << "FnSpecialization:     {Latency = " << Latency
+                      << "} for instruction " << *I << "\n");
+
+    TotalLatency += Latency;
+  }
+
+  return TotalLatency;
+}
+
+Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use,
+                                                Constant *C) {
   // We have already propagated a constant for this user.
   if (KnownConstants.contains(User))
-    return {0, 0};
+    return 0;
 
   // Cache the iterator before visiting.
   LastVisited = Use ? KnownConstants.insert({Use, C}).first
@@ -198,7 +234,7 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
   } else {
     C = visit(*User);
     if (!C)
-      return {0, 0};
+      return 0;
   }
 
   // Even though it doesn't make sense to bind switch and branch instructions
@@ -208,23 +244,15 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
 
   CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
 
-  uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
-                    BFI.getEntryFreq().getFrequency();
-
-  Cost Latency = Weight *
-      TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
-
   LLVM_DEBUG(dbgs() << "FnSpecialization:     {CodeSize = " << CodeSize
-                    << ", Latency = " << Latency << "} for user "
-                    << *User << "\n");
+                    << "} for user " << *User << "\n");
 
-  Bonus B(CodeSize, Latency);
   for (auto *U : User->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (UI != User && isBlockExecutable(UI->getParent()))
-        B += getUserBonus(UI, User, C);
+        CodeSize += getCodeSizeSavingsForUser(UI, User, C);
 
-  return B;
+  return CodeSize;
 }
 
 Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
@@ -809,6 +837,18 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
   return Clone;
 }
 
+/// Get the unsigned Value of given Cost object. Assumes the Cost is always
+/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
+/// always Valid.
+static unsigned getCostValue(const Cost &C) {
+  int64_t Value = *C.getValue();
+
+  assert(Value >= 0 && "CodeSize and Latency cannot be negative");
+  // It is safe to down cast since we know the arguments cannot be negative and
+  // Cost is of type int64_t.
+  return static_cast<unsigned>(Value);
+}
+
 bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
                                               SmallVectorImpl<Spec> &AllSpecs,
                                               SpecMap &SM) {
@@ -875,48 +915,67 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
       AllSpecs[Index].CallSites.push_back(&CS);
     } else {
       // Calculate the specialisation gain.
-      Bonus B;
+      Cost CodeSize;
       unsigned Score = 0;
       InstCostVisitor Visitor = getInstCostVisitorFor(F);
       for (ArgInfo &A : S.Args) {
-        B += Visitor.getSpecializationBonus(A.Formal, A.Actual);
+        CodeSize += Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual);
         Score += getInliningBonus(A.Formal, A.Actual);
       }
-      B += Visitor.getBonusFromPendingPHIs();
+      CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
 
-
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
-                        << B.CodeSize << ", Latency = " << B.Latency
-                        << ", Inlining = " << Score << "}\n");
-
-      FunctionGrowth[F] += FuncSize - B.CodeSize;
-
-      auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
-                             unsigned FuncGrowth) -> bool {
+      auto IsProfitable = [&]() -> bool {
         // No check required.
         if (ForceSpecialization)
           return true;
+
+        unsigned CodeSizeSavings = getCostValue(CodeSize);
+        // TODO: We should only accumulate codesize increase of specializations
+        // that are actually created.
+        FunctionGrowth[F] += FuncSize - CodeSizeSavings;
+
+        LLVM_DEBUG(
+            dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
+                   << Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
+
         // Minimum inlining bonus.
         if (Score > MinInliningBonus * FuncSize / 100)
           return true;
+
+        LLVM_DEBUG(
+            dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
+                   << CodeSizeSavings << " ("
+                   << (CodeSizeSavings * 100 / FuncSize) << "%)}\n");
+
         // Minimum codesize savings.
-        if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
+        if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100)
           return false;
+
+        // Lazily compute the Latency, to avoid unnecessarily computing BFI.
+        unsigned LatencySavings =
+            getCostValue(Visitor.getLatencySavingsForKnownConstants());
+
+        LLVM_DEBUG(
+            dbgs() << "FnSpecialization: Specialization bonus {Latency = "
+                   << LatencySavings << " ("
+                   << (LatencySavings * 100 / FuncSize) << "%)}\n");
+
         // Minimum latency savings.
-        if (B.Latency < MinLatencySavings * FuncSize / 100)
+        if (LatencySavings < MinLatencySavings * FuncSize / 100)
           return false;
         // Maximum codesize growth.
-        if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
+        if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
           return false;
+
+        Score += std::max(CodeSizeSavings, LatencySavings);
         return true;
       };
 
       // Discard unprofitable specialisations.
-      if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F]))
+      if (!IsProfitable())
         continue;
 
       // Create a new specialisation entry.
-      Score += std::max(B.CodeSize, B.Latency);
       auto &Spec = AllSpecs.emplace_back(F, S, Score);
       if (CS.getFunction() != F)
         Spec.CallSites.push_back(&CS);

diff  --git a/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll b/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll
index ff57569d127884..f8c8e33dfc2335 100644
--- a/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll
+++ b/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll
@@ -4,25 +4,25 @@
 
 ; This test case is trying to validate that the postdomtree is preserved
 ; correctly by the ipsccp pass. A tricky bug was introduced in commit
-; 1b1232047e83b69561 when PDT would be feched using getCachedAnalysis in order
+; 1b1232047e83b69561 when PDT would be fetched using getCachedAnalysis in order
 ; to setup a DomTreeUpdater (to update the PDT during transformation in order
 ; to preserve the analysis). But given that commit the PDT could end up being
 ; required and calculated via BlockFrequency analysis. So the problem was that
 ; when setting up the DomTreeUpdater we used a nullptr in case PDT wasn't
-; cached at the begininng of IPSCCP, to indicate that no updates where needed
+; cached at the beginning of IPSCCP, to indicate that no updates were needed
 ; for PDT. But then the PDT was calculated, given the input IR, and preserved
 ; using the non-updated state (as the DTU wasn't configured for updating the
 ; PDT).
 
 ; CHECK-NOT: <badref>
 ; CHECK: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries.
-; CHECK-NEXT:   [1]  <<exit node>> {4294967295,4294967295} [0]
-; CHECK-NEXT:     [2] %for.cond34 {4294967295,4294967295} [1]
-; CHECK-NEXT:       [3] %for.cond16 {4294967295,4294967295} [2]
-; CHECK-NEXT:     [2] %for.body {4294967295,4294967295} [1]
-; CHECK-NEXT:     [2] %if.end4 {4294967295,4294967295} [1]
-; CHECK-NEXT:       [3] %entry {4294967295,4294967295} [2]
-; CHECK-NEXT: Roots: %for.cond34 %for.body
+; CHECK-NEXT:  [1]  <<exit node>> {4294967295,4294967295} [0]
+; CHECK-NEXT:    [2] %for.body {4294967295,4294967295} [1]
+; CHECK-NEXT:    [2] %if.end4 {4294967295,4294967295} [1]
+; CHECK-NEXT:      [3] %entry {4294967295,4294967295} [2]
+; CHECK-NEXT:    [2] %for.cond34 {4294967295,4294967295} [1]
+; CHECK-NEXT:      [3] %for.cond16 {4294967295,4294967295} [2]
+; CHECK-NEXT: Roots: %for.body %for.cond34
 ; CHECK-NEXT: PostDominatorTree for function: bar
 ; CHECK-NOT: <badref>
 

diff  --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
index b0ff55489e1762..c8fd366bfac65f 100644
--- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
+++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
@@ -43,6 +43,7 @@ class FunctionSpecializationTest : public testing::Test {
   FunctionAnalysisManager FAM;
   std::unique_ptr<Module> M;
   std::unique_ptr<SCCPSolver> Solver;
+  SmallVector<Instruction *, 8> KnownConstants;
 
   FunctionSpecializationTest() {
     FAM.registerPass([&] { return TargetLibraryAnalysis(); });
@@ -97,21 +98,29 @@ class FunctionSpecializationTest : public testing::Test {
                                GetAC);
   }
 
-  Bonus getInstCost(Instruction &I, bool SizeOnly = false) {
+  Cost getCodeSizeSavings(Instruction &I, bool HasLatencySavings = true) {
     auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
-    auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
 
     Cost CodeSize =
         TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
 
-    Cost Latency =
-        SizeOnly
-            ? 0
-            : BFI.getBlockFreq(I.getParent()).getFrequency() /
-                  BFI.getEntryFreq().getFrequency() *
-                  TTI.getInstructionCost(&I, TargetTransformInfo::TCK_Latency);
+    if (HasLatencySavings)
+      KnownConstants.push_back(&I);
 
-    return {CodeSize, Latency};
+    return CodeSize;
+  }
+
+  Cost getLatencySavings(Function *F) {
+    auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
+    auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*F);
+
+    Cost Latency = 0;
+    for (const Instruction *I : KnownConstants)
+      Latency += BFI.getBlockFreq(I->getParent()).getFrequency() /
+                 BFI.getEntryFreq().getFrequency() *
+                 TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+
+    return Latency;
   }
 };
 
@@ -171,25 +180,32 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Instruction &BrLoop = BB2.back();
 
   // mul
-  Bonus Ref = getInstCost(Mul);
-  Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
+  Cost Ref = getCodeSizeSavings(Mul);
+  Cost Test = Visitor.getCodeSizeSavingsForArg(F->getArg(0), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
   // and + or + add
-  Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
-  Test = Visitor.getSpecializationBonus(F->getArg(1), One);
+  Ref = getCodeSizeSavings(And) + getCodeSizeSavings(Or) +
+        getCodeSizeSavings(Add);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(1), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
   // switch + sdiv + br + br
-  Ref = getInstCost(Switch) +
-        getInstCost(Sdiv, /*SizeOnly =*/ true) +
-        getInstCost(BrBB2, /*SizeOnly =*/ true) +
-        getInstCost(BrLoop, /*SizeOnly =*/ true);
-  Test = Visitor.getSpecializationBonus(F->getArg(2), One);
+  Ref = getCodeSizeSavings(Switch) +
+        getCodeSizeSavings(Sdiv, /*HasLatencySavings=*/false) +
+        getCodeSizeSavings(BrBB2, /*HasLatencySavings=*/false) +
+        getCodeSizeSavings(BrLoop, /*HasLatencySavings=*/false);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(2), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
+
+  // Latency.
+  Ref = getLatencySavings(F);
+  Test = Visitor.getLatencySavingsForKnownConstants();
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test > 0);
 }
 
 TEST_F(FunctionSpecializationTest, BranchInst) {
@@ -238,27 +254,33 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Instruction &BrLoop = BB2.front();
 
   // mul
-  Bonus Ref = getInstCost(Mul);
-  Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
+  Cost Ref = getCodeSizeSavings(Mul);
+  Cost Test = Visitor.getCodeSizeSavingsForArg(F->getArg(0), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
   // add
-  Ref = getInstCost(Add);
-  Test = Visitor.getSpecializationBonus(F->getArg(1), One);
+  Ref = getCodeSizeSavings(Add);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(1), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
   // branch + sub + br + sdiv + br
-  Ref = getInstCost(Branch) +
-        getInstCost(Sub, /*SizeOnly =*/ true) +
-        getInstCost(BrBB1BB2) +
-        getInstCost(Sdiv, /*SizeOnly =*/ true) +
-        getInstCost(BrBB2, /*SizeOnly =*/ true) +
-        getInstCost(BrLoop, /*SizeOnly =*/ true);
-  Test = Visitor.getSpecializationBonus(F->getArg(2), False);
+  Ref = getCodeSizeSavings(Branch) +
+        getCodeSizeSavings(Sub, /*HasLatencySavings=*/false) +
+        getCodeSizeSavings(BrBB1BB2) +
+        getCodeSizeSavings(Sdiv, /*HasLatencySavings=*/false) +
+        getCodeSizeSavings(BrBB2, /*HasLatencySavings=*/false) +
+        getCodeSizeSavings(BrLoop, /*HasLatencySavings=*/false);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(2), False);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test > 0);
+
+  // Latency.
+  Ref = getLatencySavings(F);
+  Test = Visitor.getLatencySavingsForKnownConstants();
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 }
 
 TEST_F(FunctionSpecializationTest, SelectInst) {
@@ -279,14 +301,22 @@ TEST_F(FunctionSpecializationTest, SelectInst) {
   Constant *False = ConstantInt::getFalse(M.getContext());
   Instruction &Select = *F->front().begin();
 
-  Bonus Ref = getInstCost(Select);
-  Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), False);
-  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
-  Test = Visitor.getSpecializationBonus(F->getArg(1), One);
-  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
-  Test = Visitor.getSpecializationBonus(F->getArg(2), Zero);
-  EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  Cost RefCodeSize = getCodeSizeSavings(Select);
+  Cost RefLatency = getLatencySavings(F);
+
+  Cost TestCodeSize = Visitor.getCodeSizeSavingsForArg(F->getArg(0), False);
+  EXPECT_TRUE(TestCodeSize == 0);
+  TestCodeSize = Visitor.getCodeSizeSavingsForArg(F->getArg(1), One);
+  EXPECT_TRUE(TestCodeSize == 0);
+  Cost TestLatency = Visitor.getLatencySavingsForKnownConstants();
+  EXPECT_TRUE(TestLatency == 0);
+
+  TestCodeSize = Visitor.getCodeSizeSavingsForArg(F->getArg(2), Zero);
+  EXPECT_EQ(TestCodeSize, RefCodeSize);
+  EXPECT_TRUE(TestCodeSize > 0);
+  TestLatency = Visitor.getLatencySavingsForKnownConstants();
+  EXPECT_EQ(TestLatency, RefLatency);
+  EXPECT_TRUE(TestLatency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, Misc) {
@@ -332,26 +362,32 @@ TEST_F(FunctionSpecializationTest, Misc) {
   Instruction &Smax = *BlockIter++;
 
   // icmp + zext
-  Bonus Ref = getInstCost(Icmp) + getInstCost(Zext);
-  Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
+  Cost Ref = getCodeSizeSavings(Icmp) + getCodeSizeSavings(Zext);
+  Cost Test = Visitor.getCodeSizeSavingsForArg(F->getArg(0), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
   // select
-  Ref = getInstCost(Select);
-  Test = Visitor.getSpecializationBonus(F->getArg(1), True);
+  Ref = getCodeSizeSavings(Select);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(1), True);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
   // gep + load + freeze + smax
-  Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) +
-        getInstCost(Smax);
-  Test = Visitor.getSpecializationBonus(F->getArg(2), GV);
+  Ref = getCodeSizeSavings(Gep) + getCodeSizeSavings(Load) +
+        getCodeSizeSavings(Freeze) + getCodeSizeSavings(Smax);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(2), GV);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 
-  Test = Visitor.getSpecializationBonus(F->getArg(3), Undef);
-  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(3), Undef);
+  EXPECT_TRUE(Test == 0);
+
+  // Latency.
+  Ref = getLatencySavings(F);
+  Test = Visitor.getLatencySavingsForKnownConstants();
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test > 0);
 }
 
 TEST_F(FunctionSpecializationTest, PhiNode) {
@@ -401,25 +437,35 @@ TEST_F(FunctionSpecializationTest, PhiNode) {
   Instruction &Icmp = *++BB.begin();
   Instruction &Branch = BB.back();
 
-  Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
-  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
+  Cost Test = Visitor.getCodeSizeSavingsForArg(F->getArg(0), One);
+  EXPECT_TRUE(Test == 0);
 
-  Test = Visitor.getSpecializationBonus(F->getArg(1), One);
-  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(1), One);
+  EXPECT_TRUE(Test == 0);
+
+  Test = Visitor.getLatencySavingsForKnownConstants();
+  EXPECT_TRUE(Test == 0);
 
   // switch + phi + br
-  Bonus Ref = getInstCost(Switch) +
-              getInstCost(PhiCase2, /*SizeOnly =*/ true) +
-              getInstCost(BrBB, /*SizeOnly =*/ true);
-  Test = Visitor.getSpecializationBonus(F->getArg(2), One);
+  Cost Ref = getCodeSizeSavings(Switch) +
+             getCodeSizeSavings(PhiCase2, /*HasLatencySavings=*/false) +
+             getCodeSizeSavings(BrBB, /*HasLatencySavings=*/false);
+  Test = Visitor.getCodeSizeSavingsForArg(F->getArg(2), One);
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0 && Test > 0);
 
   // phi + phi + add + icmp + branch
-  Ref = getInstCost(PhiBB) + getInstCost(PhiLoop) + getInstCost(Add) +
-        getInstCost(Icmp) + getInstCost(Branch);
-  Test = Visitor.getBonusFromPendingPHIs();
+  Ref = getCodeSizeSavings(PhiBB) + getCodeSizeSavings(PhiLoop) +
+        getCodeSizeSavings(Add) + getCodeSizeSavings(Icmp) +
+        getCodeSizeSavings(Branch);
+  Test = Visitor.getCodeSizeSavingsFromPendingPHIs();
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test > 0);
+
+  // Latency.
+  Ref = getLatencySavings(F);
+  Test = Visitor.getLatencySavingsForKnownConstants();
   EXPECT_EQ(Test, Ref);
-  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+  EXPECT_TRUE(Test > 0);
 }