[llvm] 20c8f58 - [FuncSpec] Split the specialization bonus into CodeSize and Latency.

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 26 04:07:24 PDT 2023


Author: Alexandros Lamprineas
Date: 2023-07-26T12:03:46+01:00
New Revision: 20c8f58c11d06d4a31fef5033ea5417fb99dfd0e

URL: https://github.com/llvm/llvm-project/commit/20c8f58c11d06d4a31fef5033ea5417fb99dfd0e
DIFF: https://github.com/llvm/llvm-project/commit/20c8f58c11d06d4a31fef5033ea5417fb99dfd0e.diff

LOG: [FuncSpec] Split the specialization bonus into CodeSize and Latency.

Currently we use a combined metric TargetTransformInfo::TCK_SizeAndLatency
when estimating the specialization bonus. This is suboptimal, and in some
cases erroneous. For example we shouldn't be weighting the codesize decrease
attributed to constant propagation by the block frequency of the dead code.
Instead only the latency savings should be weighted by block frequency. The
total codesize savings from all the specialization arguments should be
deducted from the specialization cost.

Differential Revision: https://reviews.llvm.org/D155103

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
    llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
    llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 4e78d9db024c56..2d82d8da1614a7 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -108,17 +108,49 @@ struct Spec {
   SpecSig Sig;
 
   // Profitability of the specialization.
-  Cost Score;
+  unsigned Score;
 
   // List of call sites, matching this specialization.
   SmallVector<CallBase *> CallSites;
 
-  Spec(Function *F, const SpecSig &S, Cost Score)
+  Spec(Function *F, const SpecSig &S, unsigned Score)
       : F(F), Sig(S), Score(Score) {}
-  Spec(Function *F, const SpecSig &&S, Cost Score)
+  Spec(Function *F, const SpecSig &&S, unsigned Score)
       : F(F), Sig(S), Score(Score) {}
 };
 
+struct Bonus {
+  unsigned CodeSize = 0;
+  unsigned Latency = 0;
+
+  Bonus() = default;
+
+  Bonus(Cost CodeSize, Cost Latency) {
+    int64_t Sz = *CodeSize.getValue();
+    int64_t Ltc = *Latency.getValue();
+
+    assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
+    // It is safe to down cast since we know the arguments
+    // cannot be negative and Cost is of type int64_t.
+    this->CodeSize = static_cast<unsigned>(Sz);
+    this->Latency = static_cast<unsigned>(Ltc);
+  }
+
+  Bonus &operator+=(const Bonus RHS) {
+    CodeSize += RHS.CodeSize;
+    Latency += RHS.Latency;
+    return *this;
+  }
+
+  Bonus operator+(const Bonus RHS) const {
+    return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
+  }
+
+  bool operator==(const Bonus RHS) const {
+    return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
+  }
+};
+
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   const DataLayout &DL;
   BlockFrequencyInfo &BFI;
@@ -143,10 +175,10 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
                   TargetTransformInfo &TTI, SCCPSolver &Solver)
       : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
 
-  Cost getUserBonus(Instruction *User, Value *Use = nullptr,
-                    Constant *C = nullptr);
+  Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
+                     Constant *C = nullptr);
 
-  Cost getBonusFromPendingPHIs();
+  Bonus getBonusFromPendingPHIs();
 
 private:
   friend class InstVisitor<InstCostVisitor, Constant *>;
@@ -208,8 +240,8 @@ class FunctionSpecializer {
   }
 
   /// Compute a bonus for replacing argument \p A with constant \p C.
-  Cost getSpecializationBonus(Argument *A, Constant *C,
-                              InstCostVisitor &Visitor);
+  Bonus getSpecializationBonus(Argument *A, Constant *C,
+                               InstCostVisitor &Visitor);
 
 private:
   Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
@@ -236,7 +268,7 @@ class FunctionSpecializer {
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
   /// @return True, if any potential specializations were found
-  bool findSpecializations(Function *F, Cost SpecCost,
+  bool findSpecializations(Function *F, unsigned SpecCost,
                            SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
 
   bool isCandidateFunction(Function *F);

diff  --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 3d6c501e45968e..b0dc03582950a3 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -101,29 +101,21 @@ static cl::opt<bool> SpecializeLiteralConstant(
     "Enable specialization of functions that take a literal constant as an "
     "argument"));
 
-// Estimates the instruction cost of all the basic blocks in \p WorkList.
-// The successors of such blocks are added to the list as long as they are
-// executable and they have a unique predecessor. \p WorkList represents
-// the basic blocks of a specialization which become dead once we replace
-// instructions that are known to be constants. The aim here is to estimate
-// the combination of size and latency savings in comparison to the non
-// specialized version of the function.
+// Estimates the codesize savings due to dead code after constant propagation.
+// \p WorkList represents the basic blocks of a specialization which will
+// eventually become dead once we replace instructions that are known to be
+// constants. The successors of such blocks are added to the list as long as
+// the \p Solver found they were executable prior to specialization, and only
+// if they have a unique predecessor.
 static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
                                 DenseSet<BasicBlock *> &DeadBlocks,
                                 ConstMap &KnownConstants, SCCPSolver &Solver,
-                                BlockFrequencyInfo &BFI,
                                 TargetTransformInfo &TTI) {
-  Cost Bonus = 0;
-
+  Cost CodeSize = 0;
   // Accumulate the instruction cost of each basic block weighted by frequency.
   while (!WorkList.empty()) {
     BasicBlock *BB = WorkList.pop_back_val();
 
-    uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
-                      BFI.getEntryFreq();
-    if (!Weight)
-      continue;
-
     // These blocks are considered dead as far as the InstCostVisitor is
     // concerned. They haven't been proven dead yet by the Solver, but
     // may become if we propagate the constant specialization arguments.
@@ -139,11 +131,11 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
       if (KnownConstants.contains(&I))
         continue;
 
-      Bonus += Weight *
-          TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+      Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus " << Bonus
-                        << " after user " << I << "\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization:     CodeSize " << C
+                        << " for user " << I << "\n");
+      CodeSize += C;
     }
 
     // Keep adding dead successors to the list as long as they are
@@ -153,7 +145,7 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
           SuccBB->getUniquePredecessor() == BB)
         WorkList.push_back(SuccBB);
   }
-  return Bonus;
+  return CodeSize;
 }
 
 static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
@@ -164,49 +156,51 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
   return nullptr;
 }
 
-Cost InstCostVisitor::getBonusFromPendingPHIs() {
-  Cost Bonus = 0;
+Bonus InstCostVisitor::getBonusFromPendingPHIs() {
+  Bonus B;
   while (!PendingPHIs.empty()) {
     Instruction *Phi = PendingPHIs.pop_back_val();
-    Bonus += getUserBonus(Phi);
+    B += getUserBonus(Phi);
   }
-  return Bonus;
+  return B;
 }
 
-Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
+Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
   // Cache the iterator before visiting.
   LastVisited = Use ? KnownConstants.insert({Use, C}).first
                     : KnownConstants.end();
 
-  if (auto *I = dyn_cast<SwitchInst>(User))
-    return estimateSwitchInst(*I);
-
-  if (auto *I = dyn_cast<BranchInst>(User))
-    return estimateBranchInst(*I);
-
-  C = visit(*User);
-  if (!C)
-    return 0;
+  Cost CodeSize = 0;
+  if (auto *I = dyn_cast<SwitchInst>(User)) {
+    CodeSize = estimateSwitchInst(*I);
+  } else if (auto *I = dyn_cast<BranchInst>(User)) {
+    CodeSize = estimateBranchInst(*I);
+  } else {
+    C = visit(*User);
+    if (!C)
+      return {0, 0};
+    KnownConstants.insert({User, C});
+  }
 
-  KnownConstants.insert({User, C});
+  CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
 
   uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
                     BFI.getEntryFreq();
-  if (!Weight)
-    return 0;
 
-  Cost Bonus = Weight *
-      TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
+  Cost Latency = Weight *
+      TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus " << Bonus
-                    << " for user " << *User << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization:     {CodeSize = " << CodeSize
+                    << ", Latency = " << Latency << "} for user "
+                    << *User << "\n");
 
+  Bonus B(CodeSize, Latency);
   for (auto *U : User->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (UI != User && Solver.isBlockExecutable(UI->getParent()))
-        Bonus += getUserBonus(UI, User, C);
+        B += getUserBonus(UI, User, C);
 
-  return Bonus;
+  return B;
 }
 
 Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
@@ -232,8 +226,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
     WorkList.push_back(BB);
   }
 
-  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
-                             TTI);
+  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
 }
 
 Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
@@ -250,8 +243,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
       Succ->getUniquePredecessor() == I.getParent())
     WorkList.push_back(Succ);
 
-  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
-                             TTI);
+  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
 }
 
 Constant *InstCostVisitor::visitPHINode(PHINode &I) {
@@ -572,13 +564,18 @@ bool FunctionSpecializer::run() {
     if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
       continue;
 
+    int64_t Sz = *Metrics.NumInsts.getValue();
+    assert(Sz > 0 && "CodeSize should be positive");
+    // It is safe to down cast from int64_t, NumInsts is always positive.
+    unsigned SpecCost = static_cast<unsigned>(Sz);
+
     LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
-                      << F.getName() << " is " << Metrics.NumInsts << "\n");
+                      << F.getName() << " is " << SpecCost << "\n");
 
     if (Inserted && Metrics.isRecursive)
       promoteConstantStackValues(&F);
 
-    if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) {
+    if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) {
       LLVM_DEBUG(
           dbgs() << "FnSpecialization: No possible specializations found for "
                  << F.getName() << "\n");
@@ -713,7 +710,7 @@ static Function *cloneCandidateFunction(Function *F) {
   return Clone;
 }
 
-bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
+bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
                                               SmallVectorImpl<Spec> &AllSpecs,
                                               SpecMap &SM) {
   // A mapping from a specialisation signature to the index of the respective
@@ -779,21 +776,22 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
       AllSpecs[Index].CallSites.push_back(&CS);
     } else {
       // Calculate the specialisation gain.
-      Cost Score = 0;
+      Bonus B;
       InstCostVisitor Visitor = getInstCostVisitorFor(F);
       for (ArgInfo &A : S.Args)
-        Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
-      Score += Visitor.getBonusFromPendingPHIs();
+        B += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+      B += Visitor.getBonusFromPendingPHIs();
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = "
-                        << Score << "\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = "
+                        << B.CodeSize << ", Latency = " << B.Latency
+                        << "}\n");
 
       // Discard unprofitable specialisations.
-      if (!ForceSpecialization && Score <= SpecCost)
+      if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize)
         continue;
 
       // Create a new specialisation entry.
-      auto &Spec = AllSpecs.emplace_back(F, S, Score);
+      auto &Spec = AllSpecs.emplace_back(F, S, B.Latency);
       if (CS.getFunction() != F)
         Spec.CallSites.push_back(&CS);
       const unsigned Index = AllSpecs.size() - 1;
@@ -860,19 +858,20 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
 }
 
 /// Compute a bonus for replacing argument \p A with constant \p C.
-Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
+Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
                                                  InstCostVisitor &Visitor) {
   LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
                     << C->getNameOrAsOperand() << "\n");
 
-  Cost TotalCost = 0;
+  Bonus B;
   for (auto *U : A->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (Solver.isBlockExecutable(UI->getParent()))
-        TotalCost += Visitor.getUserBonus(UI, A, C);
+        B += Visitor.getUserBonus(UI, A, C);
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated user bonus "
-                    << TotalCost << " for argument " << *A << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated bonus {CodeSize = "
+                    << B.CodeSize << ", Latency = " << B.Latency
+                    << "} for argument " << *A << "\n");
 
   // The below heuristic is only concerned with exposing inlining
   // opportunities via indirect call promotion. If the argument is not a
@@ -882,7 +881,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
   // while traversing the users of the specialization arguments ?
   Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
   if (!CalledFunction)
-    return TotalCost;
+    return B;
 
   // Get TTI for the called function (used for the inline cost).
   auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -892,7 +891,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
   // calls to be promoted to direct calls. If the indirect call promotion
   // would likely enable the called function to be inlined, specializing is a
   // good idea.
-  int Bonus = 0;
+  int InliningBonus = 0;
   for (User *U : A->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       continue;
@@ -919,15 +918,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
     // We clamp the bonus for this call to be between zero and the default
     // threshold.
     if (IC.isAlways())
-      Bonus += Params.DefaultThreshold;
+      InliningBonus += Params.DefaultThreshold;
     else if (IC.isVariable() && IC.getCostDelta() > 0)
-      Bonus += IC.getCostDelta();
+      InliningBonus += IC.getCostDelta();
 
-    LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << Bonus
+    LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << InliningBonus
                       << " for user " << *U << "\n");
   }
 
-  return TotalCost + Bonus;
+  return B += {0, InliningBonus};
 }
 
 /// Determine if it is possible to specialise the function for constant values

diff  --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
index be6f8fc990b3c7..f924914e0926e4 100644
--- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
+++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
@@ -81,12 +81,18 @@ class FunctionSpecializationTest : public testing::Test {
                                GetAC);
   }
 
-  Cost getInstCost(Instruction &I) {
+  Bonus getInstCost(Instruction &I, bool SizeOnly = false) {
     auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
     auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
 
-    return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
-         TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+    Cost CodeSize =
+        TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+    Cost Latency = SizeOnly ? 0 :
+        BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
+        TTI.getInstructionCost(&I, TargetTransformInfo::TCK_Latency);
+
+    return {CodeSize, Latency};
   }
 };
 
@@ -130,12 +136,13 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
 
   auto FuncIter = F->begin();
-  ++FuncIter;
+  BasicBlock &Loop = *++FuncIter;
   BasicBlock &Case1 = *++FuncIter;
   BasicBlock &Case2 = *++FuncIter;
   BasicBlock &BB1 = *++FuncIter;
   BasicBlock &BB2 = *++FuncIter;
 
+  Instruction &Switch = Loop.front();
   Instruction &Mul = Case1.front();
   Instruction &And = Case2.front();
   Instruction &Sdiv = *++Case2.begin();
@@ -145,22 +152,25 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Instruction &BrLoop = BB2.back();
 
   // mul
-  Cost Ref = getInstCost(Mul);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Mul);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // and + or + add
   Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
-
-  // sdiv + br + br
-  Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrLoop);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+
+  // switch + sdiv + br + br
+  Ref = getInstCost(Switch) +
+        getInstCost(Sdiv, /*SizeOnly =*/ true) +
+        getInstCost(BrBB2, /*SizeOnly =*/ true) +
+        getInstCost(BrLoop, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, BranchInst) {
@@ -192,10 +202,11 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Constant *False = ConstantInt::getFalse(M.getContext());
 
   auto FuncIter = F->begin();
-  ++FuncIter;
+  BasicBlock &Loop = *++FuncIter;
   BasicBlock &BB0 = *++FuncIter;
   BasicBlock &BB1 = *++FuncIter;
 
+  Instruction &Branch = Loop.front();
   Instruction &Mul = BB0.front();
   Instruction &Sub = *++BB0.begin();
   Instruction &BrBB1 = BB0.back();
@@ -204,23 +215,26 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Instruction &BrLoop = BB1.back();
 
   // mul
-  Cost Ref = getInstCost(Mul);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Mul);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // add
   Ref = getInstCost(Add);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
-
-  // sub + br + sdiv + br
-  Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) +
-        getInstCost(BrLoop);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+
+  // branch + sub + br + sdiv + br
+  Ref = getInstCost(Branch) +
+        getInstCost(Sub, /*SizeOnly =*/ true) +
+        getInstCost(BrBB1, /*SizeOnly =*/ true) +
+        getInstCost(Sdiv, /*SizeOnly =*/ true) +
+        getInstCost(BrLoop, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, Misc) {
@@ -266,26 +280,26 @@ TEST_F(FunctionSpecializationTest, Misc) {
   Instruction &Smax = *BlockIter++;
 
   // icmp + zext
-  Cost Ref = getInstCost(Icmp) + getInstCost(Zext);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Icmp) + getInstCost(Zext);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // select
   Ref = getInstCost(Select);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // gep + load + freeze + smax
   Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) +
         getInstCost(Smax);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
-  Bonus = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
-  EXPECT_TRUE(Bonus == 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 }
 
 TEST_F(FunctionSpecializationTest, PhiNode) {
@@ -327,16 +341,17 @@ TEST_F(FunctionSpecializationTest, PhiNode) {
 
   Instruction &Phi = BB.front();
   Instruction &Icmp = *++BB.begin();
+  Instruction &Branch = BB.back();
 
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor) +
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor) +
                Specializer.getSpecializationBonus(F->getArg(1), One, Visitor) +
                Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
-  EXPECT_TRUE(Bonus > 0);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
-  // phi + icmp
-  Cost Ref = getInstCost(Phi) + getInstCost(Icmp);
-  Bonus = Visitor.getBonusFromPendingPHIs();
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  // phi + icmp + branch
+  Bonus Ref = getInstCost(Phi) + getInstCost(Icmp) + getInstCost(Branch);
+  Test = Visitor.getBonusFromPendingPHIs();
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 


        


More information about the llvm-commits mailing list