[llvm] [WIP][TTI] Replace getStridedMemoryOpCost with getIntrinsicInstrCost (PR #165532)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 2 22:45:54 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Shih-Po Hung (arcbbb)

<details>
<summary>Changes</summary>

In #<!-- -->160470, there is a discussion about the possibility to explored a general approach for handling memory intrinsics.

This patch adds alignment to IntrinsicCostAttributes for type-based cost queries used by SLP and Loop Vectorizer before IR is materialized.

Candidates to adopt this are getMaskedMemoryOpCost, getGatherScatterOpCost, getExpandCompressMemoryOpCost, and getStridedMemoryOpCost.

---
Full diff: https://github.com/llvm/llvm-project/pull/165532.diff


7 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+9-14) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (-8) 
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+18-27) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+8-9) 
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+37-23) 
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (-6) 
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+35-16) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7b7dc1b46dd80..0270a65eac776 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -135,6 +135,9 @@ class IntrinsicCostAttributes {
   InstructionCost ScalarizationCost = InstructionCost::getInvalid();
   TargetLibraryInfo const *LibInfo = nullptr;
 
+  MaybeAlign Alignment;
+  bool VariableMask = false;
+
 public:
   LLVM_ABI IntrinsicCostAttributes(
       Intrinsic::ID Id, const CallBase &CI,
@@ -146,6 +149,10 @@ class IntrinsicCostAttributes {
       FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
       InstructionCost ScalarCost = InstructionCost::getInvalid());
 
+  LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+                                   ArrayRef<Type *> Tys, Align Alignment,
+                                   bool VariableMask = false);
+
   LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
                                    ArrayRef<const Value *> Args);
 
@@ -160,6 +167,8 @@ class IntrinsicCostAttributes {
   const IntrinsicInst *getInst() const { return II; }
   Type *getReturnType() const { return RetTy; }
   FastMathFlags getFlags() const { return FMF; }
+  MaybeAlign getAlign() const { return Alignment; }
+  bool getVariableMask() const { return VariableMask; }
   InstructionCost getScalarizationCost() const { return ScalarizationCost; }
   const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
   const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
@@ -1586,20 +1595,6 @@ class TargetTransformInfo {
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       const Instruction *I = nullptr) const;
 
-  /// \return The cost of strided memory operations.
-  /// \p Opcode - is a type of memory access Load or Store
-  /// \p DataTy - a vector type of the data to be loaded or stored
-  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
-  /// \p VariableMask - true when the memory access is predicated with a mask
-  ///                   that is not a compile-time constant
-  /// \p Alignment - alignment of single element
-  /// \p I - the optional original context instruction, if one exists, e.g. the
-  ///        load/store to transform or the call to the gather/scatter intrinsic
-  LLVM_ABI InstructionCost getStridedMemoryOpCost(
-      unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-      const Instruction *I = nullptr) const;
-
   /// \return The cost of the interleaved memory operation.
   /// \p Opcode is the memory operation code
   /// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4cd607c0d0c8d..a9591704c9d14 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -862,14 +862,6 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
-  virtual InstructionCost
-  getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
-                         bool VariableMask, Align Alignment,
-                         TTI::TargetCostKind CostKind,
-                         const Instruction *I = nullptr) const {
-    return InstructionCost::getInvalid();
-  }
-
   virtual InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 76b6c8ec68c72..fbd481d1c794f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1574,18 +1574,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                        /*IsGatherScatter*/ true, CostKind);
   }
 
-  InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
-                                         const Value *Ptr, bool VariableMask,
-                                         Align Alignment,
-                                         TTI::TargetCostKind CostKind,
-                                         const Instruction *I) const override {
-    // For a target without strided memory operations (or for an illegal
-    // operation type on one which does), assume we lower to a gather/scatter
-    // operation.  (Which may in turn be scalarized.)
-    return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                           Alignment, CostKind, I);
-  }
-
   InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1958,27 +1946,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     }
     case Intrinsic::experimental_vp_strided_store: {
       const Value *Data = Args[0];
-      const Value *Ptr = Args[1];
       const Value *Mask = Args[3];
       const Value *EVL = Args[4];
       bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
       Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
       Align Alignment =
           I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
-      return thisT()->getStridedMemoryOpCost(Instruction::Store,
-                                             Data->getType(), Ptr, VarMask,
-                                             Alignment, CostKind, I);
+      return thisT()->getCommonMaskedMemoryOpCost(
+          Instruction::Store, Data->getType(), Alignment, VarMask,
+          /*IsGatherScatter*/ true, CostKind);
     }
     case Intrinsic::experimental_vp_strided_load: {
-      const Value *Ptr = Args[0];
       const Value *Mask = Args[2];
       const Value *EVL = Args[3];
       bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
       Type *EltTy = cast<VectorType>(RetTy)->getElementType();
       Align Alignment =
           I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
-      return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
-                                             VarMask, Alignment, CostKind, I);
+      return thisT()->getCommonMaskedMemoryOpCost(
+          Instruction::Load, RetTy, Alignment, VarMask,
+          /*IsGatherScatter*/ true, CostKind);
     }
     case Intrinsic::stepvector: {
       if (isa<ScalableVectorType>(RetTy))
@@ -2418,17 +2405,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     }
     case Intrinsic::experimental_vp_strided_store: {
       auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
-      Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
-      return thisT()->getStridedMemoryOpCost(
-          Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
-          Alignment, CostKind, ICA.getInst());
+      Align Alignment = ICA.getAlign().value_or(
+          thisT()->DL.getABITypeAlign(Ty->getElementType()));
+      return thisT()->getCommonMaskedMemoryOpCost(
+          Instruction::Store, Ty, Alignment,
+          /*VariableMask=*/true,
+          /*IsGatherScatter*/ true, CostKind);
     }
     case Intrinsic::experimental_vp_strided_load: {
       auto *Ty = cast<VectorType>(ICA.getReturnType());
-      Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
-      return thisT()->getStridedMemoryOpCost(
-          Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
-          Alignment, CostKind, ICA.getInst());
+      Align Alignment = ICA.getAlign().value_or(
+          thisT()->DL.getABITypeAlign(Ty->getElementType()));
+      return thisT()->getCommonMaskedMemoryOpCost(
+          Instruction::Load, Ty, Alignment,
+          /*VariableMask=*/true,
+          /*IsGatherScatter*/ true, CostKind);
     }
     case Intrinsic::vector_reduce_add:
     case Intrinsic::vector_reduce_mul:
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c47a1c1b23a37..821017e985cc2 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -96,6 +96,14 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
   ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
 }
 
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+                                                 ArrayRef<Type *> Tys,
+                                                 Align Alignment,
+                                                 bool VariableMask)
+    : RetTy(RTy), IID(Id), Alignment(Alignment), VariableMask(VariableMask) {
+  ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
+}
+
 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
                                                  ArrayRef<const Value *> Args)
     : RetTy(Ty), IID(Id) {
@@ -1210,15 +1218,6 @@ InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
-    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
-  InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
-      Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
-  return Cost;
-}
-
 InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 7bc0b5b394828..da74320af0821 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1172,29 +1172,6 @@ InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
          LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
 }
 
-InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
-    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
-  if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
-       !isLegalStridedLoadStore(DataTy, Alignment)) ||
-      (Opcode != Instruction::Load && Opcode != Instruction::Store))
-    return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                         Alignment, CostKind, I);
-
-  if (CostKind == TTI::TCK_CodeSize)
-    return TTI::TCC_Basic;
-
-  // Cost is proportional to the number of memory operations implied.  For
-  // scalable vectors, we use an estimate on that number since we don't
-  // know exactly what VL will be.
-  auto &VTy = *cast<VectorType>(DataTy);
-  InstructionCost MemOpCost =
-      getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
-                      {TTI::OK_AnyValue, TTI::OP_None}, I);
-  unsigned NumLoads = getEstimatedVLFor(&VTy);
-  return NumLoads * MemOpCost;
-}
-
 InstructionCost
 RISCVTTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
   // FIXME: This is a property of the default vector convention, not
@@ -1561,6 +1538,43 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                           cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
                           0, cast<VectorType>(ICA.getReturnType()));
   }
+  case Intrinsic::experimental_vp_strided_load:
+  case Intrinsic::experimental_vp_strided_store: {
+    if (CostKind == TTI::TCK_CodeSize)
+      return TTI::TCC_Basic;
+
+    auto *DataTy = (ICA.getID() == Intrinsic::experimental_vp_strided_load)
+                       ? cast<VectorType>(ICA.getReturnType())
+                       : cast<VectorType>(ICA.getArgTypes()[0]);
+    Type *EltTy = DataTy->getElementType();
+
+    Align ABITyAlign = DL.getABITypeAlign(EltTy);
+
+    const IntrinsicInst *I = ICA.getInst();
+    Align Alignment;
+    if (ICA.isTypeBasedOnly())
+      Alignment = ICA.getAlign().value_or(ABITyAlign);
+    else {
+      unsigned Index =
+          (ICA.getID() == Intrinsic::experimental_vp_strided_load) ? 0 : 1;
+      Alignment = I->getParamAlign(Index).value_or(ABITyAlign);
+    }
+
+    if (!isLegalStridedLoadStore(DataTy, Alignment))
+      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+    unsigned Opcode = ICA.getID() == Intrinsic::experimental_vp_strided_load
+                          ? Instruction::Load
+                          : Instruction::Store;
+    // Cost is proportional to the number of memory operations implied.  For
+    // scalable vectors, we use an estimate on that number since we don't
+    // know exactly what VL will be.
+    InstructionCost MemOpCost =
+        getMemoryOpCost(Opcode, EltTy, Alignment, 0, CostKind,
+                        {TTI::OK_AnyValue, TTI::OP_None}, I);
+    unsigned NumLoads = getEstimatedVLFor(DataTy);
+    return NumLoads * MemOpCost;
+  }
   case Intrinsic::fptoui_sat:
   case Intrinsic::fptosi_sat: {
     InstructionCost Cost = 0;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6886e8964e29e..af456cdd41bd7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -202,12 +202,6 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
                                 Align Alignment, TTI::TargetCostKind CostKind,
                                 const Instruction *I = nullptr) const override;
 
-  InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
-                                         const Value *Ptr, bool VariableMask,
-                                         Align Alignment,
-                                         TTI::TargetCostKind CostKind,
-                                         const Instruction *I) const override;
-
   InstructionCost
   getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;
 
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4fcaf6dabb513..262201dac131a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7224,10 +7224,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
               VectorGEPCost;
           break;
         case LoadsState::StridedVectorize:
-          VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
-                                                  LI0->getPointerOperand(),
-                                                  /*VariableMask=*/false,
-                                                  CommonAlignment, CostKind) +
+          VecLdCost += TTI.getIntrinsicInstrCost(
+                           {Intrinsic::experimental_vp_strided_load,
+                            SubVecTy,
+                            {},
+                            CommonAlignment,
+                            /*VariableMask=*/false},
+                           CostKind) +
                        VectorGEPCost;
           break;
         case LoadsState::CompressVectorize:
@@ -13191,9 +13194,13 @@ void BoUpSLP::transformNodes() {
                                  BaseLI->getPointerAddressSpace(), CostKind,
                                  TTI::OperandValueInfo()) +
             ::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
-        InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
-            Instruction::Load, VecTy, BaseLI->getPointerOperand(),
-            /*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
+        InstructionCost StridedCost =
+            TTI->getIntrinsicInstrCost({Intrinsic::experimental_vp_strided_load,
+                                        VecTy,
+                                        {},
+                                        CommonAlignment,
+                                        /*VariableMask=*/false},
+                                       CostKind);
         if (StridedCost < OriginalVecCost || ForceStridedLoads) {
           // Strided load is more profitable than consecutive load + reverse -
           // transform the node to strided load.
@@ -13226,9 +13233,13 @@ void BoUpSLP::transformNodes() {
                                  BaseSI->getPointerAddressSpace(), CostKind,
                                  TTI::OperandValueInfo()) +
             ::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
-        InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
-            Instruction::Store, VecTy, BaseSI->getPointerOperand(),
-            /*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
+        InstructionCost StridedCost = TTI->getIntrinsicInstrCost(
+            {Intrinsic::experimental_vp_strided_store,
+             Type::getVoidTy(VecTy->getContext()),
+             {VecTy},
+             CommonAlignment,
+             /*VariableMask=*/false},
+            CostKind);
         if (StridedCost < OriginalVecCost)
           // Strided store is more profitable than reverse + consecutive store -
           // transform the node to strided store.
@@ -14991,9 +15002,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       case TreeEntry::StridedVectorize: {
         Align CommonAlignment =
             computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
-        VecLdCost = TTI->getStridedMemoryOpCost(
-            Instruction::Load, VecTy, LI0->getPointerOperand(),
-            /*VariableMask=*/false, CommonAlignment, CostKind);
+        VecLdCost =
+            TTI->getIntrinsicInstrCost({Intrinsic::experimental_vp_strided_load,
+                                        VecTy,
+                                        {},
+                                        CommonAlignment,
+                                        /*VariableMask=*/false},
+                                       CostKind);
         break;
       }
       case TreeEntry::CompressVectorize: {
@@ -15084,9 +15099,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       if (E->State == TreeEntry::StridedVectorize) {
         Align CommonAlignment =
             computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
-        VecStCost = TTI->getStridedMemoryOpCost(
-            Instruction::Store, VecTy, BaseSI->getPointerOperand(),
-            /*VariableMask=*/false, CommonAlignment, CostKind);
+        VecStCost = TTI->getIntrinsicInstrCost(
+            {Intrinsic::experimental_vp_strided_store,
+             Type::getVoidTy(VecTy->getContext()),
+             {VecTy},
+             CommonAlignment,
+             /*VariableMask=*/false},
+            CostKind);
       } else {
         assert(E->State == TreeEntry::Vectorize &&
                "Expected either strided or consecutive stores.");

``````````

</details>


https://github.com/llvm/llvm-project/pull/165532


More information about the llvm-commits mailing list