[llvm] 239fcda - [LV] NFCI: Do cost comparison on InstructionCost directly.

Sat Jul 10 04:23:06 PDT 2021

Author: Sander de Smalen
Date: 2021-07-10T11:57:16+01:00
New Revision: 239fcda268dc554daf4c2bf20888c72c55518bda

URL: https://github.com/llvm/llvm-project/commit/239fcda268dc554daf4c2bf20888c72c55518bda
DIFF: https://github.com/llvm/llvm-project/commit/239fcda268dc554daf4c2bf20888c72c55518bda.diff

LOG: [LV] NFCI: Do cost comparison on InstructionCost directly.

Instead of performing the isMoreProfitable() operation on
InstructionCost::CostTy the operation is performed on InstructionCost
directly, so that it can handle the case where one of the costs is
Invalid.

This patch also changes the CostTy to be int64_t, so that the type is
wide enough to deal with multiplications with e.g. `unsigned MaxTripCount`.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D105113

Added: 
    

Modified: 
    llvm/include/llvm/Support/InstructionCost.h
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h
index 30f68597e6f2..7656f45b2031 100644

--- a/llvm/include/llvm/Support/InstructionCost.h
+++ b/llvm/include/llvm/Support/InstructionCost.h
@@ -28,7 +28,7 @@ class raw_ostream;
 
 class InstructionCost {
 public:
-  using CostType = int;
+  using CostType = int64_t;
 
   /// CostState describes the state of a cost.
   enum CostState {

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 13243f77dd45..94f6aad5d33c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6014,8 +6014,8 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
 
 bool LoopVectorizationCostModel::isMoreProfitable(
     const VectorizationFactor &A, const VectorizationFactor &B) const {
-  InstructionCost::CostType CostA = *A.Cost.getValue();
-  InstructionCost::CostType CostB = *B.Cost.getValue();
+  InstructionCost CostA = A.Cost;
+  InstructionCost CostB = B.Cost;
 
   unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
 
@@ -6028,8 +6028,8 @@ bool LoopVectorizationCostModel::isMoreProfitable(
     // be PerIterationCost*floor(TC/VF) + Scalar remainder cost, and so is
     // approximated with the per-lane cost below instead of using the tripcount
     // as here.
-    int64_t RTCostA = CostA * divideCeil(MaxTripCount, A.Width.getFixedValue());
-    int64_t RTCostB = CostB * divideCeil(MaxTripCount, B.Width.getFixedValue());
+    auto RTCostA = CostA * divideCeil(MaxTripCount, A.Width.getFixedValue());
+    auto RTCostB = CostB * divideCeil(MaxTripCount, B.Width.getFixedValue());
     return RTCostA < RTCostB;
   }
 
@@ -6064,7 +6064,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
     // Ignore scalar width, because the user explicitly wants vectorization.
     // Initialize cost to max so that VF = 2 is, at least, chosen during cost
     // evaluation.
-    ChosenFactor.Cost = std::numeric_limits<InstructionCost::CostType>::max();
+    ChosenFactor.Cost = InstructionCost::getMax();
   }
 
   for (const auto &i : VFCandidates) {
@@ -6600,10 +6600,14 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
 
   // A lambda that gets the register usage for the given type and VF.
   const auto &TTICapture = TTI;
-  auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) {
+  auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
     if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty))
       return 0;
-    return *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue();
+    InstructionCost::CostType RegUsage =
+        *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue();
+    assert(RegUsage >= 0 && RegUsage <= std::numeric_limits<unsigned>::max() &&
+           "Nonsensical values for register usage.");
+    return RegUsage;
   };
 
   for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {