[llvm] [TTI][Vectorize] Migrate masked/gather-scatter/strided/expand-compress costing (NFCI) (PR #165532)

Shih-Po Hung via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 5 00:05:56 PST 2025


https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/165532

>From f9ea255fb3a883926b529988dac1a96997b056ba Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 30 Oct 2025 18:54:01 -0700
Subject: [PATCH 1/8] Add getMemIntrinsicInstrCost

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0f17312b03827..4b87a77ef62f6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1678,6 +1678,20 @@ class TargetTransformInfo {
   LLVM_ABI InstructionCost getIntrinsicInstrCost(
       const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const;
 
+  /// \returns The cost of memory Intrinsic instructions.
+  /// It is used when IntrinsicInst is not meterialized.
+  /// \p DataTy - a vector type of the data to be loaded or stored
+  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
+  /// \p VariableMask - true when the memory access is predicated with a mask
+  ///                   that is not a compile-time constant
+  /// \p Alignment - alignment of single element
+  /// \p I - the optional original context instruction, if one exists, e.g. the
+  ///        load/store to transform or the call to the gather/scatter intrinsic
+  LLVM_ABI InstructionCost getMemIntrinsicInstrCost(
+      Intrinsic::ID Id, Type *DataTy, const Value *Ptr, bool VariableMask,
+      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+      const Instruction *I = nullptr) const;
+
   /// \returns The cost of Call instructions.
   LLVM_ABI InstructionCost getCallInstrCost(
       Function *F, Type *RetTy, ArrayRef<Type *> Tys,

>From d913f523eb023b561fe73a4a65811343c32bd878 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 01:02:53 -0800
Subject: [PATCH 2/8] [TTI] Remove getStridedMemoryOpCost from
 Analysis/TargetTransformInfo

---
 .../llvm/Analysis/TargetTransformInfo.h       | 14 -------
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  7 ++++
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 41 ++++++++++++++-----
 llvm/lib/Analysis/TargetTransformInfo.cpp     | 18 ++++----
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 28 +++++++------
 5 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4b87a77ef62f6..a307f8066254a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1586,20 +1586,6 @@ class TargetTransformInfo {
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       const Instruction *I = nullptr) const;
 
-  /// \return The cost of strided memory operations.
-  /// \p Opcode - is a type of memory access Load or Store
-  /// \p DataTy - a vector type of the data to be loaded or stored
-  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
-  /// \p VariableMask - true when the memory access is predicated with a mask
-  ///                   that is not a compile-time constant
-  /// \p Alignment - alignment of single element
-  /// \p I - the optional original context instruction, if one exists, e.g. the
-  ///        load/store to transform or the call to the gather/scatter intrinsic
-  LLVM_ABI InstructionCost getStridedMemoryOpCost(
-      unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-      const Instruction *I = nullptr) const;
-
   /// \return The cost of the interleaved memory operation.
   /// \p Opcode is the memory operation code
   /// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index aacb88d2f9684..7c408736edf7e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -926,6 +926,13 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
+  virtual InstructionCost
+  getMemIntrinsicInstrCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                           bool VariableMask, Align Alignment,
+                           TTI::TargetCostKind CostKind,
+                           const Instruction *I = nullptr) const {
+    return 1;
+  }
   virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
                                            ArrayRef<Type *> Tys,
                                            TTI::TargetCostKind CostKind) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e8dbc964a943e..6a31bf555a848 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1966,9 +1966,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
       Align Alignment =
           I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
-      return thisT()->getStridedMemoryOpCost(Instruction::Store,
-                                             Data->getType(), Ptr, VarMask,
-                                             Alignment, CostKind, I);
+      return thisT()->getMemIntrinsicInstrCost(IID, Data->getType(), Ptr,
+                                               VarMask, Alignment, CostKind, I);
     }
     case Intrinsic::experimental_vp_strided_load: {
       const Value *Ptr = Args[0];
@@ -1978,8 +1977,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       Type *EltTy = cast<VectorType>(RetTy)->getElementType();
       Align Alignment =
           I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
-      return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
-                                             VarMask, Alignment, CostKind, I);
+      return thisT()->getMemIntrinsicInstrCost(IID, RetTy, Ptr, VarMask,
+                                               Alignment, CostKind, I);
     }
     case Intrinsic::stepvector: {
       if (isa<ScalableVectorType>(RetTy))
@@ -2420,16 +2419,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::experimental_vp_strided_store: {
       auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
       Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
-      return thisT()->getStridedMemoryOpCost(
-          Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
-          Alignment, CostKind, ICA.getInst());
+      return thisT()->getMemIntrinsicInstrCost(IID, Ty, /*Ptr=*/nullptr,
+                                               /*VariableMask=*/true, Alignment,
+                                               CostKind, ICA.getInst());
     }
     case Intrinsic::experimental_vp_strided_load: {
       auto *Ty = cast<VectorType>(ICA.getReturnType());
       Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
-      return thisT()->getStridedMemoryOpCost(
-          Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
-          Alignment, CostKind, ICA.getInst());
+      return thisT()->getMemIntrinsicInstrCost(IID, Ty, /*Ptr=*/nullptr,
+                                               /*VariableMask=*/true, Alignment,
+                                               CostKind, ICA.getInst());
     }
     case Intrinsic::vector_reduce_add:
     case Intrinsic::vector_reduce_mul:
@@ -3017,6 +3016,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return SingleCallCost;
   }
 
+  /// Get memory intrinsic cost based on arguments.
+  InstructionCost getMemIntrinsicInstrCost(Intrinsic::ID Id, Type *DataTy,
+                                           const Value *Ptr, bool VariableMask,
+                                           Align Alignment,
+                                           TTI::TargetCostKind CostKind,
+                                           const Instruction *I) const {
+    switch (Id) {
+    case Intrinsic::experimental_vp_strided_load:
+    case Intrinsic::experimental_vp_strided_store: {
+      unsigned Opcode = (Id == Intrinsic::experimental_vp_strided_load)
+                            ? Instruction::Load
+                            : Instruction::Store;
+      return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                             Alignment, CostKind, I);
+    }
+    default:
+      llvm_unreachable("unexpected intrinsic");
+    }
+  }
+
   /// Compute a cost of the given call instruction.
   ///
   /// Compute the cost of calling function F with return type RetTy and
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 0426ac7e62fab..316ed4a966fd4 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1210,15 +1210,6 @@ InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
-    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
-  InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
-      Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
-  return Cost;
-}
-
 InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1238,6 +1229,15 @@ TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   return Cost;
 }
 
+InstructionCost TargetTransformInfo::getMemIntrinsicInstrCost(
+    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
+  InstructionCost Cost = TTIImpl->getMemIntrinsicInstrCost(
+      Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 InstructionCost
 TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
                                       ArrayRef<Type *> Tys,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf3f52c51b64c..db50dbd68ed82 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7241,10 +7241,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
               VectorGEPCost;
           break;
         case LoadsState::StridedVectorize:
-          VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
-                                                  LI0->getPointerOperand(),
-                                                  /*VariableMask=*/false,
-                                                  CommonAlignment, CostKind) +
+          VecLdCost += TTI.getMemIntrinsicInstrCost(
+                           Intrinsic::experimental_vp_strided_load, SubVecTy,
+                           LI0->getPointerOperand(),
+                           /*VariableMask=*/false, CommonAlignment, CostKind) +
                        VectorGEPCost;
           break;
         case LoadsState::CompressVectorize:
@@ -13208,8 +13208,9 @@ void BoUpSLP::transformNodes() {
                                  BaseLI->getPointerAddressSpace(), CostKind,
                                  TTI::OperandValueInfo()) +
             ::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
-        InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
-            Instruction::Load, VecTy, BaseLI->getPointerOperand(),
+        InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
+            Intrinsic::experimental_vp_strided_load, VecTy,
+            BaseLI->getPointerOperand(),
             /*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
         if (StridedCost < OriginalVecCost || ForceStridedLoads) {
           // Strided load is more profitable than consecutive load + reverse -
@@ -13243,8 +13244,9 @@ void BoUpSLP::transformNodes() {
                                  BaseSI->getPointerAddressSpace(), CostKind,
                                  TTI::OperandValueInfo()) +
             ::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
-        InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
-            Instruction::Store, VecTy, BaseSI->getPointerOperand(),
+        InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
+            Intrinsic::experimental_vp_strided_store, VecTy,
+            BaseSI->getPointerOperand(),
             /*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
         if (StridedCost < OriginalVecCost)
           // Strided store is more profitable than reverse + consecutive store -
@@ -15008,8 +15010,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       case TreeEntry::StridedVectorize: {
         Align CommonAlignment =
             computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
-        VecLdCost = TTI->getStridedMemoryOpCost(
-            Instruction::Load, VecTy, LI0->getPointerOperand(),
+        VecLdCost = TTI->getMemIntrinsicInstrCost(
+            Intrinsic::experimental_vp_strided_load, VecTy,
+            LI0->getPointerOperand(),
             /*VariableMask=*/false, CommonAlignment, CostKind);
         break;
       }
@@ -15101,8 +15104,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       if (E->State == TreeEntry::StridedVectorize) {
         Align CommonAlignment =
             computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
-        VecStCost = TTI->getStridedMemoryOpCost(
-            Instruction::Store, VecTy, BaseSI->getPointerOperand(),
+        VecStCost = TTI->getMemIntrinsicInstrCost(
+            Intrinsic::experimental_vp_strided_store, VecTy,
+            BaseSI->getPointerOperand(),
             /*VariableMask=*/false, CommonAlignment, CostKind);
       } else {
         assert(E->State == TreeEntry::Vectorize &&

>From 5dfe7e7f97564da5b3deca36eb5a9aff629dc720 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 01:46:36 -0800
Subject: [PATCH 3/8] [TTI] Remove getGatherScatterOpCost from
 Analysis/TargetTransformInfo

---
 .../llvm/Analysis/TargetTransformInfo.h       | 14 --------
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 34 +++++++++++++------
 llvm/lib/Analysis/TargetTransformInfo.cpp     | 10 ------
 .../Transforms/Vectorize/LoopVectorize.cpp    |  9 +++--
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 ++++++------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |  8 +++--
 6 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index a307f8066254a..8d9e0817ec33a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1559,20 +1559,6 @@ class TargetTransformInfo {
       unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
 
-  /// \return The cost of Gather or Scatter operation
-  /// \p Opcode - is a type of memory access Load or Store
-  /// \p DataTy - a vector type of the data to be loaded or stored
-  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
-  /// \p VariableMask - true when the memory access is predicated with a mask
-  ///                   that is not a compile-time constant
-  /// \p Alignment - alignment of single element
-  /// \p I - the optional original context instruction, if one exists, e.g. the
-  ///        load/store to transform or the call to the gather/scatter intrinsic
-  LLVM_ABI InstructionCost getGatherScatterOpCost(
-      unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-      const Instruction *I = nullptr) const;
-
   /// \return The cost of Expand Load or Compress Store operation
   /// \p Opcode - is a type of memory access Load or Store
   /// \p Src - a vector type of the data to be loaded or stored
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 6a31bf555a848..97f3c7eaa2b2a 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1808,9 +1808,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
           Alignment = VPI->getPointerAlignment().valueOrOne();
         bool VarMask = isa<Constant>(ICA.getArgs()[2]);
-        return thisT()->getGatherScatterOpCost(
-            Instruction::Store, ICA.getArgTypes()[0], ICA.getArgs()[1], VarMask,
-            Alignment, CostKind, nullptr);
+        return thisT()->getMemIntrinsicInstrCost(
+            Intrinsic::vp_scatter, ICA.getArgTypes()[0], ICA.getArgs()[1],
+            VarMask, Alignment, CostKind, nullptr);
       }
       if (ICA.getID() == Intrinsic::vp_gather) {
         if (ICA.isTypeBasedOnly()) {
@@ -1824,9 +1824,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
           Alignment = VPI->getPointerAlignment().valueOrOne();
         bool VarMask = isa<Constant>(ICA.getArgs()[1]);
-        return thisT()->getGatherScatterOpCost(
-            Instruction::Load, ICA.getReturnType(), ICA.getArgs()[0], VarMask,
-            Alignment, CostKind, nullptr);
+        return thisT()->getMemIntrinsicInstrCost(
+            Intrinsic::vp_gather, ICA.getReturnType(), ICA.getArgs()[0],
+            VarMask, Alignment, CostKind, nullptr);
       }
 
       if (ICA.getID() == Intrinsic::vp_select ||
@@ -1931,16 +1931,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       const Value *Mask = Args[2];
       bool VarMask = !isa<Constant>(Mask);
       Align Alignment = I->getParamAlign(1).valueOrOne();
-      return thisT()->getGatherScatterOpCost(Instruction::Store,
-                                             ICA.getArgTypes()[0], Args[1],
-                                             VarMask, Alignment, CostKind, I);
+      return thisT()->getMemIntrinsicInstrCost(Intrinsic::masked_scatter,
+                                               ICA.getArgTypes()[0], Args[1],
+                                               VarMask, Alignment, CostKind, I);
     }
     case Intrinsic::masked_gather: {
       const Value *Mask = Args[1];
       bool VarMask = !isa<Constant>(Mask);
       Align Alignment = I->getParamAlign(0).valueOrOne();
-      return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
-                                             VarMask, Alignment, CostKind, I);
+      return thisT()->getMemIntrinsicInstrCost(Intrinsic::masked_gather, RetTy,
+                                               Args[0], VarMask, Alignment,
+                                               CostKind, I);
     }
     case Intrinsic::masked_compressstore: {
       const Value *Data = Args[0];
@@ -3031,6 +3032,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
                                              Alignment, CostKind, I);
     }
+    case Intrinsic::masked_scatter:
+    case Intrinsic::masked_gather:
+    case Intrinsic::vp_scatter:
+    case Intrinsic::vp_gather: {
+      unsigned Opcode =
+          ((Id == Intrinsic::masked_gather) || (Id == Intrinsic::vp_gather))
+              ? Instruction::Load
+              : Instruction::Store;
+      return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                             Alignment, CostKind, I);
+    }
     default:
       llvm_unreachable("unexpected intrinsic");
     }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 316ed4a966fd4..ed9a28521c707 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1191,16 +1191,6 @@ InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getGatherScatterOpCost(
-    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
-  InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
-      Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
-  assert((!Cost.isValid() || Cost >= 0) &&
-         "TTI should not produce negative costs!");
-  return Cost;
-}
-
 InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
     unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
     TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e5c3f17860103..3374188bc111e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5257,10 +5257,13 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
   if (!Legal->isUniform(Ptr, VF))
     PtrTy = toVectorTy(PtrTy, VF);
 
+  unsigned IID = (I->getOpcode() == Instruction::Load)
+                     ? Intrinsic::masked_gather
+                     : Intrinsic::masked_scatter;
   return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
-         TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
-                                    Legal->isMaskRequired(I), Alignment,
-                                    CostKind, I);
+         TTI.getMemIntrinsicInstrCost(IID, VectorTy, Ptr,
+                                      Legal->isMaskRequired(I), Alignment,
+                                      CostKind, I);
 }
 
 InstructionCost
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index db50dbd68ed82..30ed08728bb7b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7132,9 +7132,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
         ScalarGEPCost;
     // The cost of masked gather.
     InstructionCost MaskedGatherCost =
-        TTI.getGatherScatterOpCost(
-            Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
-            /*VariableMask=*/false, CommonAlignment, CostKind) +
+        TTI.getMemIntrinsicInstrCost(Intrinsic::masked_gather, VecTy,
+                                     cast<LoadInst>(VL0)->getPointerOperand(),
+                                     /*VariableMask=*/false, CommonAlignment,
+                                     CostKind) +
         (ProfitableGatherPointers ? 0 : VectorGEPCost);
     InstructionCost GatherCost =
         getScalarizationOverhead(TTI, ScalarTy, VecTy, DemandedElts,
@@ -7256,11 +7257,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                                         {}, CostKind);
           break;
         case LoadsState::ScatterVectorize:
-          VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
-                                                  LI0->getPointerOperand(),
-                                                  /*VariableMask=*/false,
-                                                  CommonAlignment, CostKind) +
-                       VectorGEPCost;
+          VecLdCost +=
+              TTI.getMemIntrinsicInstrCost(
+                  Intrinsic::masked_gather, SubVecTy, LI0->getPointerOperand(),
+                  /*VariableMask=*/false, CommonAlignment, CostKind) +
+              VectorGEPCost;
           break;
         case LoadsState::Gather:
           // Gathers are already calculated - ignore.
@@ -15062,8 +15063,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       case TreeEntry::ScatterVectorize: {
         Align CommonAlignment =
             computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
-        VecLdCost = TTI->getGatherScatterOpCost(
-            Instruction::Load, VecTy, LI0->getPointerOperand(),
+        VecLdCost = TTI->getMemIntrinsicInstrCost(
+            Intrinsic::masked_gather, VecTy, LI0->getPointerOperand(),
             /*VariableMask=*/false, CommonAlignment, CostKind);
         break;
       }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1ee405a62aa68..3844c99c4f172 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3558,10 +3558,14 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
     if (!vputils::isSingleScalar(getAddr()))
       PtrTy = toVectorTy(PtrTy, VF);
 
+    unsigned IID = isa<VPWidenLoadRecipe>(this)      ? Intrinsic::masked_gather
+                   : isa<VPWidenStoreRecipe>(this)   ? Intrinsic::masked_scatter
+                   : isa<VPWidenLoadEVLRecipe>(this) ? Intrinsic::vp_gather
+                                                     : Intrinsic::vp_scatter;
     return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
                                              Ctx.CostKind) +
-           Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
-                                          Ctx.CostKind, &Ingredient);
+           Ctx.TTI.getMemIntrinsicInstrCost(IID, Ty, Ptr, IsMasked, Alignment,
+                                            Ctx.CostKind, &Ingredient);
   }
 
   InstructionCost Cost = 0;

>From cacfa70c1d4659fc0c7f3f47214686348ef3dd3f Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 22:42:26 -0800
Subject: [PATCH 4/8] [TTI] Remove getMaskedMemoryOpCost from
 Analysis/TargetTransformInfo

---
 .../llvm/Analysis/TargetTransformInfo.h       |  5 ---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 29 +++++++++++-----
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  9 -----
 .../Transforms/Vectorize/LoopVectorize.cpp    |  7 ++--
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 25 +++++++-------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 33 +++++++++++--------
 6 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 8d9e0817ec33a..a487f18cc787c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1554,11 +1554,6 @@ class TargetTransformInfo {
       OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
       const Instruction *I = nullptr) const;
 
-  /// \return The cost of masked Load and Store instructions.
-  LLVM_ABI InstructionCost getMaskedMemoryOpCost(
-      unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
-      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
   /// \return The cost of Expand Load or Compress Store operation
   /// \p Opcode - is a type of memory access Load or Store
   /// \p Src - a vector type of the data to be loaded or stored
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 97f3c7eaa2b2a..6b56eb3d8739d 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1606,10 +1606,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
     // Firstly, the cost of load/store operation.
     InstructionCost Cost;
-    if (UseMaskForCond || UseMaskForGaps)
-      Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
-                                            AddressSpace, CostKind);
-    else
+    if (UseMaskForCond || UseMaskForGaps) {
+      unsigned IID = (Opcode == Instruction::Load) ? Intrinsic::masked_load
+                                                   : Intrinsic::masked_store;
+      Cost = thisT()->getMemIntrinsicInstrCost(
+          IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ false, Alignment,
+          CostKind, /*Instruction*/ nullptr);
+    } else
       Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
                                       CostKind);
 
@@ -2408,14 +2411,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::masked_store: {
       Type *Ty = Tys[0];
       Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
-      return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
-                                            CostKind);
+      return thisT()->getMemIntrinsicInstrCost(
+          Intrinsic::masked_store, Ty,
+          /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
     }
     case Intrinsic::masked_load: {
       Type *Ty = RetTy;
       Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
-      return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
-                                            CostKind);
+      return thisT()->getMemIntrinsicInstrCost(
+          Intrinsic::masked_load, Ty,
+          /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
     }
     case Intrinsic::experimental_vp_strided_store: {
       auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
@@ -3043,6 +3048,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                              Alignment, CostKind, I);
     }
+    case Intrinsic::masked_load:
+    case Intrinsic::masked_store: {
+      unsigned Opcode = (Id == Intrinsic::masked_load) ? Instruction::Load
+                                                       : Instruction::Store;
+      unsigned AS = Ptr ? Ptr->getType()->getPointerAddressSpace() : 0;
+      return thisT()->getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AS,
+                                            CostKind);
+    }
     default:
       llvm_unreachable("unexpected intrinsic");
     }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index ed9a28521c707..ec3e233da8e5d 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1182,15 +1182,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
-    unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
-    TTI::TargetCostKind CostKind) const {
-  InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment,
-                                                        AddressSpace, CostKind);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
-  return Cost;
-}
-
 InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
     unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
     TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3374188bc111e..dc9e30345379b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5197,8 +5197,11 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
   const Align Alignment = getLoadStoreAlignment(I);
   InstructionCost Cost = 0;
   if (Legal->isMaskRequired(I)) {
-    Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
-                                      CostKind);
+    unsigned IID = (I->getOpcode() == Instruction::Load)
+                       ? Intrinsic::masked_load
+                       : Intrinsic::masked_store;
+    Cost += TTI.getMemIntrinsicInstrCost(
+        IID, VectorTy, Ptr, /*VariableMask*/ false, Alignment, CostKind);
   } else {
     TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
     Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 30ed08728bb7b..29228c9ec811c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6848,9 +6848,9 @@ static bool isMaskedLoadCompress(
       ScalarLoadsCost;
   InstructionCost LoadCost = 0;
   if (IsMasked) {
-    LoadCost =
-        TTI.getMaskedMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
-                                  LI->getPointerAddressSpace(), CostKind);
+    LoadCost = TTI.getMemIntrinsicInstrCost(
+        Intrinsic::masked_load, LoadVecTy, LI->getPointerOperand(),
+        /*VariableMask*/ false, CommonAlignment, CostKind);
   } else {
     LoadCost =
         TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
@@ -7249,12 +7249,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                        VectorGEPCost;
           break;
         case LoadsState::CompressVectorize:
-          VecLdCost += TTI.getMaskedMemoryOpCost(
-                           Instruction::Load, SubVecTy, CommonAlignment,
-                           LI0->getPointerAddressSpace(), CostKind) +
-                       VectorGEPCost +
-                       ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy,
-                                        {}, CostKind);
+          VecLdCost +=
+              TTI.getMemIntrinsicInstrCost(
+                  Intrinsic::masked_load, SubVecTy, LI0->getPointerOperand(),
+                  /*VariableMask*/ false, CommonAlignment, CostKind) +
+              VectorGEPCost +
+              ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy, {},
+                               CostKind);
           break;
         case LoadsState::ScatterVectorize:
           VecLdCost +=
@@ -15044,9 +15045,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
               Instruction::Load, LoadVecTy, InterleaveFactor, {},
               CommonAlignment, LI0->getPointerAddressSpace(), CostKind);
         } else if (IsMasked) {
-          VecLdCost = TTI->getMaskedMemoryOpCost(
-              Instruction::Load, LoadVecTy, CommonAlignment,
-              LI0->getPointerAddressSpace(), CostKind);
+          VecLdCost = TTI->getMemIntrinsicInstrCost(
+              Intrinsic::masked_load, LoadVecTy, LI0->getPointerOperand(),
+              /*VariableMask*/ false, CommonAlignment, CostKind);
           // TODO: include this cost into CommonCost.
           VecLdCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
                                         LoadVecTy, CompressMask, CostKind);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3844c99c4f172..28c972bc90041 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3570,8 +3570,11 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
 
   InstructionCost Cost = 0;
   if (IsMasked) {
-    Cost +=
-        Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind);
+    unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_load
+                                                : Intrinsic::masked_store;
+    const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+    Cost += Ctx.TTI.getMemIntrinsicInstrCost(
+        IID, Ty, Ptr, /*VariableMask*/ false, Alignment, Ctx.CostKind);
   } else {
     TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
         isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3681,16 +3684,17 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
   if (!Consecutive || IsMasked)
     return VPWidenMemoryRecipe::computeCost(VF, Ctx);
 
-  // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
+  // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()
   // here because the EVL recipes using EVL to replace the tail mask. But in the
   // legacy model, it will always calculate the cost of mask.
-  // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
+  // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we
   // don't need to compare to the legacy cost model.
   Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
-  unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
-                    ->getAddressSpace();
-  InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
-      Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
+  const Align Alignment = getLoadStoreAlignment(&Ingredient);
+  const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+  InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
+      Intrinsic::masked_load, Ty, Ptr, /*VariableMask*/ false, Alignment,
+      Ctx.CostKind);
   if (!Reverse)
     return Cost;
 
@@ -3790,16 +3794,17 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
   if (!Consecutive || IsMasked)
     return VPWidenMemoryRecipe::computeCost(VF, Ctx);
 
-  // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
+  // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()
   // here because the EVL recipes using EVL to replace the tail mask. But in the
   // legacy model, it will always calculate the cost of mask.
-  // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
+  // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we
   // don't need to compare to the legacy cost model.
   Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
-  unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
-                    ->getAddressSpace();
-  InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
-      Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
+  const Align Alignment = getLoadStoreAlignment(&Ingredient);
+  const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+  InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
+      Intrinsic::masked_store, Ty, Ptr, /*VariableMask*/ false, Alignment,
+      Ctx.CostKind);
   if (!Reverse)
     return Cost;
 

>From c6a9488c36347dba7c98f45c6fb576bfd5f36076 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 23:21:36 -0800
Subject: [PATCH 5/8] [TTI] Remove getExpandCompressMemoryOpCost from
 Analysis/TargetTransformInfo

---
 .../llvm/Analysis/TargetTransformInfo.h       | 13 ------------
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 20 +++++++++++++------
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  9 ---------
 3 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index a487f18cc787c..cca0f2f27b32a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1554,19 +1554,6 @@ class TargetTransformInfo {
       OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
       const Instruction *I = nullptr) const;
 
-  /// \return The cost of Expand Load or Compress Store operation
-  /// \p Opcode - is a type of memory access Load or Store
-  /// \p Src - a vector type of the data to be loaded or stored
-  /// \p VariableMask - true when the memory access is predicated with a mask
-  ///                   that is not a compile-time constant
-  /// \p Alignment - alignment of single element
-  /// \p I - the optional original context instruction, if one exists, e.g. the
-  ///        load/store to transform or the call to the gather/scatter intrinsic
-  LLVM_ABI InstructionCost getExpandCompressMemoryOpCost(
-      unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
-      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-      const Instruction *I = nullptr) const;
-
   /// \return The cost of the interleaved memory operation.
   /// \p Opcode is the memory operation code
   /// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 6b56eb3d8739d..ff8a19dd1a9ce 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1950,16 +1950,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       const Value *Data = Args[0];
       const Value *Mask = Args[2];
       Align Alignment = I->getParamAlign(1).valueOrOne();
-      return thisT()->getExpandCompressMemoryOpCost(
-          Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
-          CostKind, I);
+      return thisT()->getMemIntrinsicInstrCost(
+          Intrinsic::masked_compressstore, Data->getType(), Args[1],
+          !isa<Constant>(Mask), Alignment, CostKind, I);
     }
     case Intrinsic::masked_expandload: {
       const Value *Mask = Args[1];
       Align Alignment = I->getParamAlign(0).valueOrOne();
-      return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy,
-                                                    !isa<Constant>(Mask),
-                                                    Alignment, CostKind, I);
+      return thisT()->getMemIntrinsicInstrCost(
+          Intrinsic::masked_expandload, RetTy, Args[0], !isa<Constant>(Mask),
+          Alignment, CostKind, I);
     }
     case Intrinsic::experimental_vp_strided_store: {
       const Value *Data = Args[0];
@@ -3056,6 +3056,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return thisT()->getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AS,
                                             CostKind);
     }
+    case Intrinsic::masked_compressstore:
+    case Intrinsic::masked_expandload: {
+      unsigned Opcode = (Id == Intrinsic::masked_expandload)
+                            ? Instruction::Load
+                            : Instruction::Store;
+      return thisT()->getExpandCompressMemoryOpCost(
+          Opcode, DataTy, VariableMask, Alignment, CostKind, I);
+    }
     default:
       llvm_unreachable("unexpected intrinsic");
     }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index ec3e233da8e5d..e9ea50f7d98b8 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1182,15 +1182,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
-    unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
-    TTI::TargetCostKind CostKind, const Instruction *I) const {
-  InstructionCost Cost = TTIImpl->getExpandCompressMemoryOpCost(
-      Opcode, DataTy, VariableMask, Alignment, CostKind, I);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
-  return Cost;
-}
-
 InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

>From 5481271f15fc73cd07cbf10a3f2c7b721d9ca16f Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 4 Nov 2025 01:21:35 -0800
Subject: [PATCH 6/8] Use true for default variable mask for masked load/store

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h        | 6 +++---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp  | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ff8a19dd1a9ce..859c2ec321334 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1610,7 +1610,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       unsigned IID = (Opcode == Instruction::Load) ? Intrinsic::masked_load
                                                    : Intrinsic::masked_store;
       Cost = thisT()->getMemIntrinsicInstrCost(
-          IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ false, Alignment,
+          IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ true, Alignment,
           CostKind, /*Instruction*/ nullptr);
     } else
       Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
@@ -2413,14 +2413,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
       return thisT()->getMemIntrinsicInstrCost(
           Intrinsic::masked_store, Ty,
-          /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
+          /*Ptr*/ nullptr, /*VariableMask*/ true, TyAlign, CostKind, nullptr);
     }
     case Intrinsic::masked_load: {
       Type *Ty = RetTy;
       Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
       return thisT()->getMemIntrinsicInstrCost(
           Intrinsic::masked_load, Ty,
-          /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
+          /*Ptr*/ nullptr, /*VariableMask*/ true, TyAlign, CostKind, nullptr);
     }
     case Intrinsic::experimental_vp_strided_store: {
       auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dc9e30345379b..bbb30ddc446a7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5201,7 +5201,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
                        ? Intrinsic::masked_load
                        : Intrinsic::masked_store;
     Cost += TTI.getMemIntrinsicInstrCost(
-        IID, VectorTy, Ptr, /*VariableMask*/ false, Alignment, CostKind);
+        IID, VectorTy, Ptr, /*VariableMask*/ true, Alignment, CostKind);
   } else {
     TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
     Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 29228c9ec811c..b7eb46b13f090 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6850,7 +6850,7 @@ static bool isMaskedLoadCompress(
   if (IsMasked) {
     LoadCost = TTI.getMemIntrinsicInstrCost(
         Intrinsic::masked_load, LoadVecTy, LI->getPointerOperand(),
-        /*VariableMask*/ false, CommonAlignment, CostKind);
+        /*VariableMask*/ true, CommonAlignment, CostKind);
   } else {
     LoadCost =
         TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
@@ -7252,7 +7252,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
           VecLdCost +=
               TTI.getMemIntrinsicInstrCost(
                   Intrinsic::masked_load, SubVecTy, LI0->getPointerOperand(),
-                  /*VariableMask*/ false, CommonAlignment, CostKind) +
+                  /*VariableMask*/ true, CommonAlignment, CostKind) +
               VectorGEPCost +
               ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy, {},
                                CostKind);
@@ -15047,7 +15047,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
         } else if (IsMasked) {
           VecLdCost = TTI->getMemIntrinsicInstrCost(
               Intrinsic::masked_load, LoadVecTy, LI0->getPointerOperand(),
-              /*VariableMask*/ false, CommonAlignment, CostKind);
+              /*VariableMask*/ true, CommonAlignment, CostKind);
           // TODO: include this cost into CommonCost.
           VecLdCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
                                         LoadVecTy, CompressMask, CostKind);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 28c972bc90041..8767f53ab2d3a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3574,7 +3574,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
                                                 : Intrinsic::masked_store;
     const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
     Cost += Ctx.TTI.getMemIntrinsicInstrCost(
-        IID, Ty, Ptr, /*VariableMask*/ false, Alignment, Ctx.CostKind);
+        IID, Ty, Ptr, /*VariableMask*/ true, Alignment, Ctx.CostKind);
   } else {
     TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
         isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3693,7 +3693,7 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
   const Align Alignment = getLoadStoreAlignment(&Ingredient);
   const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
   InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
-      Intrinsic::masked_load, Ty, Ptr, /*VariableMask*/ false, Alignment,
+      Intrinsic::masked_load, Ty, Ptr, /*VariableMask*/ true, Alignment,
       Ctx.CostKind);
   if (!Reverse)
     return Cost;
@@ -3803,7 +3803,7 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
   const Align Alignment = getLoadStoreAlignment(&Ingredient);
   const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
   InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
-      Intrinsic::masked_store, Ty, Ptr, /*VariableMask*/ false, Alignment,
+      Intrinsic::masked_store, Ty, Ptr, /*VariableMask*/ true, Alignment,
       Ctx.CostKind);
   if (!Reverse)
     return Cost;

>From 65e71a25c361d00c34e3e7ab2d0619954bd571d8 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 4 Nov 2025 03:19:04 -0800
Subject: [PATCH 7/8] Fix build error

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 859c2ec321334..a35b8158a2a8a 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -3023,11 +3023,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   }
 
   /// Get memory intrinsic cost based on arguments.
-  InstructionCost getMemIntrinsicInstrCost(Intrinsic::ID Id, Type *DataTy,
-                                           const Value *Ptr, bool VariableMask,
-                                           Align Alignment,
-                                           TTI::TargetCostKind CostKind,
-                                           const Instruction *I) const {
+  InstructionCost
+  getMemIntrinsicInstrCost(Intrinsic::ID Id, Type *DataTy, const Value *Ptr,
+                           bool VariableMask, Align Alignment,
+                           TTI::TargetCostKind CostKind,
+                           const Instruction *I) const override {
     switch (Id) {
     case Intrinsic::experimental_vp_strided_load:
     case Intrinsic::experimental_vp_strided_store: {

>From 8104387a72e41c3a05523617147751b38b21d2c2 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 4 Nov 2025 23:57:17 -0800
Subject: [PATCH 8/8] Address comments

---
 .../include/llvm/Analysis/TargetTransformInfo.h |  4 ++--
 llvm/include/llvm/CodeGen/BasicTTIImpl.h        | 17 ++++++++---------
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp |  4 ++--
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cca0f2f27b32a..ddbdaaab92a23 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1632,8 +1632,8 @@ class TargetTransformInfo {
   LLVM_ABI InstructionCost getIntrinsicInstrCost(
       const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const;
 
-  /// \returns The cost of memory Intrinsic instructions.
-  /// It is used when IntrinsicInst is not meterialized.
+  /// \returns The cost of memory intrinsic instructions.
+  /// Used when IntrinsicInst is not materialized.
   /// \p DataTy - a vector type of the data to be loaded or stored
   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
   /// \p VariableMask - true when the memory access is predicated with a mask
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a35b8158a2a8a..cf0a1ce94c8a0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1607,8 +1607,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     // Firstly, the cost of load/store operation.
     InstructionCost Cost;
     if (UseMaskForCond || UseMaskForGaps) {
-      unsigned IID = (Opcode == Instruction::Load) ? Intrinsic::masked_load
-                                                   : Intrinsic::masked_store;
+      unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load
+                                                 : Intrinsic::masked_store;
       Cost = thisT()->getMemIntrinsicInstrCost(
           IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ true, Alignment,
           CostKind, /*Instruction*/ nullptr);
@@ -3031,7 +3031,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     switch (Id) {
     case Intrinsic::experimental_vp_strided_load:
     case Intrinsic::experimental_vp_strided_store: {
-      unsigned Opcode = (Id == Intrinsic::experimental_vp_strided_load)
+      unsigned Opcode = Id == Intrinsic::experimental_vp_strided_load
                             ? Instruction::Load
                             : Instruction::Store;
       return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
@@ -3042,7 +3042,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::vp_scatter:
     case Intrinsic::vp_gather: {
       unsigned Opcode =
-          ((Id == Intrinsic::masked_gather) || (Id == Intrinsic::vp_gather))
+          (Id == Intrinsic::masked_gather || Id == Intrinsic::vp_gather)
               ? Instruction::Load
               : Instruction::Store;
       return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
@@ -3050,17 +3050,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     }
     case Intrinsic::masked_load:
     case Intrinsic::masked_store: {
-      unsigned Opcode = (Id == Intrinsic::masked_load) ? Instruction::Load
-                                                       : Instruction::Store;
+      unsigned Opcode =
+          Id == Intrinsic::masked_load ? Instruction::Load : Instruction::Store;
       unsigned AS = Ptr ? Ptr->getType()->getPointerAddressSpace() : 0;
       return thisT()->getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AS,
                                             CostKind);
     }
     case Intrinsic::masked_compressstore:
     case Intrinsic::masked_expandload: {
-      unsigned Opcode = (Id == Intrinsic::masked_expandload)
-                            ? Instruction::Load
-                            : Instruction::Store;
+      unsigned Opcode = Id == Intrinsic::masked_expandload ? Instruction::Load
+                                                           : Instruction::Store;
       return thisT()->getExpandCompressMemoryOpCost(
           Opcode, DataTy, VariableMask, Alignment, CostKind, I);
     }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bbb30ddc446a7..bc5e8d54bdb75 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5197,7 +5197,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
   const Align Alignment = getLoadStoreAlignment(I);
   InstructionCost Cost = 0;
   if (Legal->isMaskRequired(I)) {
-    unsigned IID = (I->getOpcode() == Instruction::Load)
+    unsigned IID = I->getOpcode() == Instruction::Load
                        ? Intrinsic::masked_load
                        : Intrinsic::masked_store;
     Cost += TTI.getMemIntrinsicInstrCost(
@@ -5260,7 +5260,7 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
   if (!Legal->isUniform(Ptr, VF))
     PtrTy = toVectorTy(PtrTy, VF);
 
-  unsigned IID = (I->getOpcode() == Instruction::Load)
+  unsigned IID = I->getOpcode() == Instruction::Load
                      ? Intrinsic::masked_gather
                      : Intrinsic::masked_scatter;
   return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +



More information about the llvm-commits mailing list