[llvm] [CostModel] Remove optional from InstructionCost::getValue() (PR #135596)

Sun Apr 20 22:48:13 PDT 2025

https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/135596

>From d747677d1add0a2ef3fe7c9da3f9857150797b05 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 14 Apr 2025 09:20:48 +0100
Subject: [PATCH 1/2] [CostModel] Remove optional from
 InstructionCost::getValue()

InstructionCost is already an optional value, containing an Invalid state that
can be checked with isValid(). There is little point in returning another
optional from getValue(). Most uses do not make use of it being a
std::optional, dereferencing the value directly (either isValid has been
checked previously or the Cost is assumed to be valid). The one case that does
in AMDGPU used value_or which has been replaced by a new getValueOr(CostType)
function.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h            |  4 ++--
 llvm/include/llvm/Support/InstructionCost.h         | 13 ++++++++-----
 llvm/include/llvm/Transforms/Utils/UnrollLoop.h     |  2 +-
 llvm/lib/Analysis/CostModel.cpp                     |  4 ++--
 llvm/lib/CodeGen/SelectOptimize.cpp                 |  6 +++---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp     |  2 +-
 .../Target/AArch64/AArch64TargetTransformInfo.cpp   |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp        |  3 +--
 .../lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp |  4 ++--
 llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp  |  2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp         |  2 +-
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp  | 13 ++++++-------
 .../Target/SystemZ/SystemZTargetTransformInfo.cpp   |  2 +-
 llvm/lib/Target/X86/X86TargetTransformInfo.cpp      |  6 +++---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp  |  4 ++--
 llvm/lib/Transforms/IPO/PartialInlining.cpp         |  2 +-
 llvm/lib/Transforms/Scalar/ConstantHoisting.cpp     |  4 ++--
 llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp     |  2 +-
 llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp   |  2 +-
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp       |  6 +++---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp     | 12 ++++++------
 llvm/unittests/Support/InstructionCostTest.cpp      |  6 +++---
 22 files changed, 52 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f6ec21caa4d72..f2ccdfb673583 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1609,7 +1609,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
       // Scale the cost of the load by the fraction of legal instructions that
       // will be used.
-      Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts);
+      Cost = divideCeil(UsedInsts.count() * Cost.getValue(), NumLegalInsts);
     }
 
     // Then plus the cost of interleave operation.
@@ -2877,7 +2877,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
           SubTp && SubTp->getElementType() == FTp->getElementType())
         return divideCeil(FTp->getNumElements(), SubTp->getNumElements());
     }
-    return *LT.first.getValue();
+    return LT.first.getValue();
   }
 
   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h
index b5af0e0401ef2..634c10fba9afc 100644
--- a/llvm/include/llvm/Support/InstructionCost.h
+++ b/llvm/include/llvm/Support/InstructionCost.h
@@ -20,7 +20,6 @@
 
 #include "llvm/Support/MathExtras.h"
 #include <limits>
-#include <optional>
 
 namespace llvm {
 
@@ -84,10 +83,14 @@ class InstructionCost {
   /// This function is intended to be used as sparingly as possible, since the
   /// class provides the full range of operator support required for arithmetic
   /// and comparisons.
-  std::optional<CostType> getValue() const {
-    if (isValid())
-      return Value;
-    return std::nullopt;
+  CostType getValue() const {
+    assert(isValid());
+    return Value;
+  }
+  CostType getValueOr(CostType Alt) const {
+    if (!isValid())
+      return Alt;
+    return Value;
   }
 
   /// For all of the arithmetic operators provided here any invalid state is
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index ed560f6f6e2fa..6759afd8077e9 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -143,7 +143,7 @@ class UnrollCostEstimator {
   /// Whether it is legal to unroll this loop.
   bool canUnroll() const;
 
-  uint64_t getRolledLoopSize() const { return *LoopSize.getValue(); }
+  uint64_t getRolledLoopSize() const { return LoopSize.getValue(); }
 
   /// Returns loop size estimation for unrolled loop, given the unrolling
   /// configuration specified by UP.
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index cec0fb6b98dea..6d8bd7d924074 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -128,8 +128,8 @@ PreservedAnalyses CostModelPrinterPass::run(Function &F,
       } else {
         InstructionCost Cost =
             getCost(Inst, OutputCostKindToTargetCostKind(CostKind), TTI, TLI);
-        if (auto CostVal = Cost.getValue())
-          OS << "Found an estimated cost of " << *CostVal;
+        if (Cost.isValid())
+          OS << "Found an estimated cost of " << Cost.getValue();
         else
           OS << "Invalid cost";
         OS << " for instruction: " << Inst << "\n";
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 00148b075134a..13ed8f28d5507 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -206,7 +206,7 @@ class SelectOptimizeImpl {
           getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency,
           {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
           {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
-      auto TotalCost = Scaled64::get(*Cost.getValue());
+      auto TotalCost = Scaled64::get(Cost.getValue());
       if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
         auto It = InstCostMap.find(OpI);
         if (It != InstCostMap.end())
@@ -1380,8 +1380,8 @@ std::optional<uint64_t>
 SelectOptimizeImpl::computeInstCost(const Instruction *I) {
   InstructionCost ICost =
       TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
-  if (auto OC = ICost.getValue())
-    return std::optional<uint64_t>(*OC);
+  if (ICost.isValid())
+    return std::optional<uint64_t>(ICost.getValue());
   return std::nullopt;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 771eee1b3fecf..0ff681c8dbac6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -28530,7 +28530,7 @@ bool AArch64TargetLowering::shouldLocalize(
         Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
     assert(Cost.isValid() && "Expected a valid imm cost");
 
-    unsigned RematCost = *Cost.getValue();
+    unsigned RematCost = Cost.getValue();
     RematCost += AdditionalCost;
     Register Reg = MI.getOperand(0).getReg();
     unsigned MaxUses = maxUses(RematCost);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 372b70a4b2d64..2794534f71823 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4596,7 +4596,7 @@ static bool isLoopSizeWithinBudget(Loop *L, AArch64TTIImpl &TTI,
   }
 
   if (FinalSize)
-    *FinalSize = *LoopCost.getValue();
+    *FinalSize = LoopCost.getValue();
   return true;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index dd3bec774ec67..2d3c8f2196274 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -205,8 +205,7 @@ static CostType calculateFunctionCosts(GetTTIFn GetTTI, Module &M,
             TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
         assert(Cost != InstructionCost::getMax());
         // Assume expensive if we can't tell the cost of an instruction.
-        CostType CostVal =
-            Cost.getValue().value_or(TargetTransformInfo::TCC_Expensive);
+        CostType CostVal = Cost.getValueOr(TargetTransformInfo::TCC_Expensive);
         assert((FnCost + CostVal) >= FnCost && "Overflow!");
         FnCost += CostVal;
       }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 09f7877b13b3a..0e17f60621a77 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1278,9 +1278,9 @@ static unsigned adjustInliningThresholdUsingCallee(const CallBase *CB,
   // The penalty cost is computed relative to the cost of instructions and does
   // not model any storage costs.
   adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
-                     *ArgStackCost.getValue() * InlineConstants::getInstrCost();
+                     ArgStackCost.getValue() * InlineConstants::getInstrCost();
   adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
-                     *ArgStackCost.getValue() * InlineConstants::getInstrCost();
+                     ArgStackCost.getValue() * InlineConstants::getInstrCost();
   return adjustThreshold;
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3a4c2fcad8c83..a1de9094e287a 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1100,7 +1100,7 @@ InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
     float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
     float MisalignmentProb = 1.0 - AlignmentProb;
     return (MisalignmentProb * P9PipelineFlushEstimate) +
-           (AlignmentProb * *Cost.getValue());
+           (AlignmentProb * Cost.getValue());
   }
 
   // Usually we should not get to this point, but the following is an attempt to
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 98fba9e86e88a..da55b2f79fb78 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2900,7 +2900,7 @@ InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
   bool Log2CostModel =
       Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
   if (Log2CostModel && LMULCost.isValid()) {
-    unsigned Log = Log2_64(*LMULCost.getValue());
+    unsigned Log = Log2_64(LMULCost.getValue());
     if (Log > 0)
       return LMULCost * Log;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 9b91de36a688a..b2691e9c22d51 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -425,7 +425,7 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT,
   auto *SingleOpTy = FixedVectorType::get(Tp->getElementType(),
                                           LegalVT.getVectorNumElements());
 
-  unsigned E = *NumOfDests.getValue();
+  unsigned E = NumOfDests.getValue();
   unsigned NormalizedVF =
       LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
   unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
@@ -651,13 +651,12 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
     if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
         shouldSplit(Kind) &&
         LT.second.getVectorElementType().getSizeInBits() ==
-        Tp->getElementType()->getPrimitiveSizeInBits() &&
+            Tp->getElementType()->getPrimitiveSizeInBits() &&
         LT.second.getVectorNumElements() <
-        cast<FixedVectorType>(Tp)->getNumElements() &&
-        divideCeil(Mask.size(),
-                   cast<FixedVectorType>(Tp)->getNumElements()) ==
-        static_cast<unsigned>(*LT.first.getValue())) {
-      unsigned NumRegs = *LT.first.getValue();
+            cast<FixedVectorType>(Tp)->getNumElements() &&
+        divideCeil(Mask.size(), cast<FixedVectorType>(Tp)->getNumElements()) ==
+            static_cast<unsigned>(LT.first.getValue())) {
+      unsigned NumRegs = LT.first.getValue();
       unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
       unsigned SubVF = PowerOf2Ceil(VF / NumRegs);
       auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e0b0099466c52..0b1ed3f48690e 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -379,7 +379,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   // The z13 processor will run out of store tags if too many stores
   // are fed into it too quickly. Therefore make sure there are not
   // too many stores in the resulting unrolled loop.
-  unsigned const NumStoresVal = *NumStores.getValue();
+  unsigned const NumStoresVal = NumStores.getValue();
   unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
 
   if (HasCall) {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 41e437a47ba29..39c1899b30366 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1749,7 +1749,7 @@ InstructionCost X86TTIImpl::getShuffleCost(
             getTypeLegalizationCost(
                 FixedVectorType::get(BaseTp->getElementType(), Mask.size()))
                 .first;
-        unsigned E = *NumOfDests.getValue();
+        unsigned E = NumOfDests.getValue();
         unsigned NormalizedVF =
             LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
         unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
@@ -4932,7 +4932,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(
           (LegalVectorBitWidth % LaneBitWidth) == 0) &&
          "Illegal vector");
 
-  const int NumLegalVectors = *LT.first.getValue();
+  const int NumLegalVectors = LT.first.getValue();
   assert(NumLegalVectors >= 0 && "Negative cost!");
 
   // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
@@ -6166,7 +6166,7 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode,
   std::pair<InstructionCost, MVT> IdxsLT = getTypeLegalizationCost(IndexVTy);
   std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcVTy);
   InstructionCost::CostType SplitFactor =
-      *std::max(IdxsLT.first, SrcLT.first).getValue();
+      std::max(IdxsLT.first, SrcLT.first).getValue();
   if (SplitFactor > 1) {
     // Handle splitting of vector of pointers
     auto *SplitSrcTy =
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index c13305ce5056d..1034ce9582152 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -662,7 +662,7 @@ FunctionSpecializer::~FunctionSpecializer() {
 /// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
 /// always Valid.
 static unsigned getCostValue(const Cost &C) {
-  int64_t Value = *C.getValue();
+  int64_t Value = C.getValue();
 
   assert(Value >= 0 && "CodeSize and Latency cannot be negative");
   // It is safe to down cast since we know the arguments cannot be negative and
@@ -713,7 +713,7 @@ bool FunctionSpecializer::run() {
     if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
       continue;
 
-    int64_t Sz = *Metrics.NumInsts.getValue();
+    int64_t Sz = Metrics.NumInsts.getValue();
     assert(Sz > 0 && "CodeSize should be positive");
     // It is safe to down cast from int64_t, NumInsts is always positive.
     unsigned FuncSize = static_cast<unsigned>(Sz);
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index e2df95ed23c10..b79fe83b23ec6 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -1320,7 +1320,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
     RelativeToEntryFreq = BranchProbability(0, 1);
 
   BlockFrequency WeightedRcost =
-      BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
+      BlockFrequency(NonWeightedRcost.getValue()) * RelativeToEntryFreq;
 
   // The call sequence(s) to the outlined function(s) are larger than the sum of
   // the original outlined region size(s), it does not increase the chances of
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 40c4c15b7120b..dd4d4efb7fecb 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -386,7 +386,7 @@ void ConstantHoistingPass::collectConstantCandidates(
       ConstIntCandVec.push_back(ConstantCandidate(ConstInt));
       Itr->second = ConstIntCandVec.size() - 1;
     }
-    ConstIntCandVec[Itr->second].addUser(Inst, Idx, *Cost.getValue());
+    ConstIntCandVec[Itr->second].addUser(Inst, Idx, Cost.getValue());
     LLVM_DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx))) dbgs()
                    << "Collect constant " << *ConstInt << " from " << *Inst
                    << " with cost " << Cost << '\n';
@@ -446,7 +446,7 @@ void ConstantHoistingPass::collectConstantCandidates(
         ConstExpr));
     Itr->second = ExprCandVec.size() - 1;
   }
-  ExprCandVec[Itr->second].addUser(Inst, Idx, *Cost.getValue());
+  ExprCandVec[Itr->second].addUser(Inst, Idx, Cost.getValue());
 }
 
 /// Check the operand for instruction Inst at index Idx.
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 838c7a1b7459d..61863bcf1337a 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -304,7 +304,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
   if (!Metrics.NumInsts.isValid())
     return MadeChange;
 
-  unsigned LoopSize = *Metrics.NumInsts.getValue();
+  unsigned LoopSize = Metrics.NumInsts.getValue();
   if (!LoopSize)
     LoopSize = 1;
 
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fd16593b2e874..04719fb70552b 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1535,7 +1535,7 @@ void Cost::RateFormula(const Formula &F,
   C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
 
   // Accumulate non-free scaling amounts.
-  C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue();
+  C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L).getValue();
 
   // Tally up the non-zero immediates.
   for (const LSRFixup &Fixup : LU.Fixups) {
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index be9b0e3244b65..d7080d6d76794 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -677,8 +677,8 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
   LLVM_DEBUG(dbgs() << "Analysis finished:\n"
                     << "UnrolledCost: " << UnrolledCost << ", "
                     << "RolledDynamicCost: " << RolledDynamicCost << "\n");
-  return {{unsigned(*UnrolledCost.getValue()),
-           unsigned(*RolledDynamicCost.getValue())}};
+  return {{unsigned(UnrolledCost.getValue()),
+           unsigned(RolledDynamicCost.getValue())}};
 }
 
 UnrollCostEstimator::UnrollCostEstimator(
@@ -729,7 +729,7 @@ bool UnrollCostEstimator::canUnroll() const {
 uint64_t UnrollCostEstimator::getUnrolledLoopSize(
     const TargetTransformInfo::UnrollingPreferences &UP,
     unsigned CountOverwrite) const {
-  unsigned LS = *LoopSize.getValue();
+  unsigned LS = LoopSize.getValue();
   assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
   if (CountOverwrite)
     return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 137f32ded7248..dc530715a6ec2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2002,7 +2002,7 @@ class GeneratedRTChecks {
           InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
 
           // Let's ensure the cost is always at least 1.
-          NewMemCheckCost = std::max(*NewMemCheckCost.getValue(),
+          NewMemCheckCost = std::max(NewMemCheckCost.getValue(),
                                      (InstructionCost::CostType)1);
 
           if (BestTripCount > 1)
@@ -5314,7 +5314,7 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
     // to estimate the cost of the loop and interleave until the cost of the
     // loop overhead is about 5% of the cost of the loop.
     unsigned SmallIC = std::min(IC, (unsigned)llvm::bit_floor<uint64_t>(
-                                        SmallLoopCost / *LoopCost.getValue()));
+                                        SmallLoopCost / LoopCost.getValue()));
 
     // Interleave until store/load ports (estimated by max interleave count) are
     // saturated.
@@ -7659,7 +7659,7 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
   LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost
                     << " (Estimated cost per lane: ");
   if (Cost.isValid()) {
-    double CostPerLane = double(*Cost.getValue()) / EstimatedWidth;
+    double CostPerLane = double(Cost.getValue()) / EstimatedWidth;
     LLVM_DEBUG(dbgs() << format("%.1f", CostPerLane));
   } else /* No point dividing an invalid cost - it will still be invalid */
     LLVM_DEBUG(dbgs() << "Invalid");
@@ -10482,7 +10482,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
 
   // The scalar cost should only be 0 when vectorizing with a user specified
   // VF/IC. In those cases, runtime checks should always be generated.
-  uint64_t ScalarC = *VF.ScalarCost.getValue();
+  uint64_t ScalarC = VF.ScalarCost.getValue();
   if (ScalarC == 0)
     return true;
 
@@ -10517,8 +10517,8 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
   // the computations are performed on doubles, not integers and the result
   // is rounded up, hence we get an upper estimate of the TC.
   unsigned IntVF = getEstimatedRuntimeVF(VF.Width, VScale);
-  uint64_t RtC = *TotalCost.getValue();
-  uint64_t Div = ScalarC * IntVF - *VF.Cost.getValue();
+  uint64_t RtC = TotalCost.getValue();
+  uint64_t Div = ScalarC * IntVF - VF.Cost.getValue();
   uint64_t MinTC1 = Div == 0 ? 0 : divideCeil(RtC * IntVF, Div);
 
   // Second, compute a minimum iteration count so that the cost of the
diff --git a/llvm/unittests/Support/InstructionCostTest.cpp b/llvm/unittests/Support/InstructionCostTest.cpp
index 4e2842d8ada97..1bfbeff152b89 100644
--- a/llvm/unittests/Support/InstructionCostTest.cpp
+++ b/llvm/unittests/Support/InstructionCostTest.cpp
@@ -23,7 +23,7 @@ TEST_F(CostTest, DefaultCtor) {
   InstructionCost DefaultCost;
 
   ASSERT_TRUE(DefaultCost.isValid());
-  EXPECT_EQ(*(DefaultCost.getValue()), 0);
+  EXPECT_EQ(DefaultCost.getValue(), 0);
 }
 
 TEST_F(CostTest, Operators) {
@@ -70,8 +70,8 @@ TEST_F(CostTest, Operators) {
   EXPECT_FALSE(TmpCost.isValid());
 
   // Test value extraction
-  EXPECT_EQ(*(VThree.getValue()), 3);
-  EXPECT_EQ(IThreeA.getValue(), std::nullopt);
+  EXPECT_EQ(VThree.getValue(), 3);
+  EXPECT_EQ(IThreeA.getValueOr(10), 10);
 
   EXPECT_EQ(std::min(VThree, VNegTwo), -2);
   EXPECT_EQ(std::max(VThree, VSix), 6);

>From 698c1c831b2b85ca612626bc435b8968aea4302d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 15 Apr 2025 11:37:55 +0100
Subject: [PATCH 2/2] Remove getValueOr - move it inline

---
 llvm/include/llvm/Support/InstructionCost.h    | 5 -----
 llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp   | 3 ++-
 llvm/unittests/Support/InstructionCostTest.cpp | 1 -
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h
index 634c10fba9afc..d5f7457e04748 100644
--- a/llvm/include/llvm/Support/InstructionCost.h
+++ b/llvm/include/llvm/Support/InstructionCost.h
@@ -87,11 +87,6 @@ class InstructionCost {
     assert(isValid());
     return Value;
   }
-  CostType getValueOr(CostType Alt) const {
-    if (!isValid())
-      return Alt;
-    return Value;
-  }
 
   /// For all of the arithmetic operators provided here any invalid state is
   /// perpetuated and cannot be removed. Once a cost becomes invalid it stays
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 2d3c8f2196274..1506f02793ba4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -205,7 +205,8 @@ static CostType calculateFunctionCosts(GetTTIFn GetTTI, Module &M,
             TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
         assert(Cost != InstructionCost::getMax());
         // Assume expensive if we can't tell the cost of an instruction.
-        CostType CostVal = Cost.getValueOr(TargetTransformInfo::TCC_Expensive);
+        CostType CostVal = Cost.isValid() ? Cost.getValue()
+                                          : TargetTransformInfo::TCC_Expensive;
         assert((FnCost + CostVal) >= FnCost && "Overflow!");
         FnCost += CostVal;
       }
diff --git a/llvm/unittests/Support/InstructionCostTest.cpp b/llvm/unittests/Support/InstructionCostTest.cpp
index 1bfbeff152b89..efe838897a684 100644
--- a/llvm/unittests/Support/InstructionCostTest.cpp
+++ b/llvm/unittests/Support/InstructionCostTest.cpp
@@ -71,7 +71,6 @@ TEST_F(CostTest, Operators) {
 
   // Test value extraction
   EXPECT_EQ(VThree.getValue(), 3);
-  EXPECT_EQ(IThreeA.getValueOr(10), 10);
 
   EXPECT_EQ(std::min(VThree, VNegTwo), -2);
   EXPECT_EQ(std::max(VThree, VSix), 6);