[llvm] [WIP][TTI] Replace getStridedMemoryOpCost with getIntrinsicInstrCost (PR #165532)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 29 02:43:07 PDT 2025
https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/165532
In #160470, there is a discussion about the possibility to explored a general approach for handling memory intrinsics.
This patch adds alignment to IntrinsicCostAttributes for type-based cost queries used by SLP and Loop Vectorizer before IR is materialized.
Candidates to adopt this are getMaskedMemoryOpCost, getGatherScatterOpCost, getExpandCompressMemoryOpCost, and getStridedMemoryOpCost.
>From 755138334d5f3471832e9db1092d03408dedb90b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 28 Oct 2025 20:05:34 -0700
Subject: [PATCH] [TTI] Replace getStridedMemoryOpCost with
getIntrinsicInstrCost
---
.../llvm/Analysis/TargetTransformInfo.h | 23 +++-----
.../llvm/Analysis/TargetTransformInfoImpl.h | 8 ---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 45 ++++++--------
llvm/lib/Analysis/TargetTransformInfo.cpp | 17 +++---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 59 +++++++++++--------
.../Target/RISCV/RISCVTargetTransformInfo.h | 6 --
.../Transforms/Vectorize/SLPVectorizer.cpp | 51 +++++++++++-----
7 files changed, 106 insertions(+), 103 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7b7dc1b46dd80..0270a65eac776 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -135,6 +135,9 @@ class IntrinsicCostAttributes {
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
TargetLibraryInfo const *LibInfo = nullptr;
+ MaybeAlign Alignment;
+ bool VariableMask = false;
+
public:
LLVM_ABI IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI,
@@ -146,6 +149,10 @@ class IntrinsicCostAttributes {
FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
InstructionCost ScalarCost = InstructionCost::getInvalid());
+ LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<Type *> Tys, Align Alignment,
+ bool VariableMask = false);
+
LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args);
@@ -160,6 +167,8 @@ class IntrinsicCostAttributes {
const IntrinsicInst *getInst() const { return II; }
Type *getReturnType() const { return RetTy; }
FastMathFlags getFlags() const { return FMF; }
+ MaybeAlign getAlign() const { return Alignment; }
+ bool getVariableMask() const { return VariableMask; }
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
@@ -1586,20 +1595,6 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
- /// \return The cost of strided memory operations.
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4cd607c0d0c8d..a9591704c9d14 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -862,14 +862,6 @@ class TargetTransformInfoImplBase {
return 1;
}
- virtual InstructionCost
- getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
- bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) const {
- return InstructionCost::getInvalid();
- }
-
virtual InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 76b6c8ec68c72..fbd481d1c794f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1574,18 +1574,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/*IsGatherScatter*/ true, CostKind);
}
- InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const override {
- // For a target without strided memory operations (or for an illegal
- // operation type on one which does), assume we lower to a gather/scatter
- // operation. (Which may in turn be scalarized.)
- return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
- }
-
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1958,27 +1946,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
- const Value *Ptr = Args[1];
const Value *Mask = Args[3];
const Value *EVL = Args[4];
bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
Align Alignment =
I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
- return thisT()->getStridedMemoryOpCost(Instruction::Store,
- Data->getType(), Ptr, VarMask,
- Alignment, CostKind, I);
+ return thisT()->getCommonMaskedMemoryOpCost(
+ Instruction::Store, Data->getType(), Alignment, VarMask,
+ /*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
- const Value *Ptr = Args[0];
const Value *Mask = Args[2];
const Value *EVL = Args[3];
bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
Type *EltTy = cast<VectorType>(RetTy)->getElementType();
Align Alignment =
I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
- return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
- VarMask, Alignment, CostKind, I);
+ return thisT()->getCommonMaskedMemoryOpCost(
+ Instruction::Load, RetTy, Alignment, VarMask,
+ /*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::stepvector: {
if (isa<ScalableVectorType>(RetTy))
@@ -2418,17 +2405,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
- Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getStridedMemoryOpCost(
- Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
- Alignment, CostKind, ICA.getInst());
+ Align Alignment = ICA.getAlign().value_or(
+ thisT()->DL.getABITypeAlign(Ty->getElementType()));
+ return thisT()->getCommonMaskedMemoryOpCost(
+ Instruction::Store, Ty, Alignment,
+ /*VariableMask=*/true,
+ /*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
auto *Ty = cast<VectorType>(ICA.getReturnType());
- Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getStridedMemoryOpCost(
- Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
- Alignment, CostKind, ICA.getInst());
+ Align Alignment = ICA.getAlign().value_or(
+ thisT()->DL.getABITypeAlign(Ty->getElementType()));
+ return thisT()->getCommonMaskedMemoryOpCost(
+ Instruction::Load, Ty, Alignment,
+ /*VariableMask=*/true,
+ /*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c47a1c1b23a37..821017e985cc2 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -96,6 +96,14 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<Type *> Tys,
+ Align Alignment,
+ bool VariableMask)
+ : RetTy(RTy), IID(Id), Alignment(Alignment), VariableMask(VariableMask) {
+ ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
+}
+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ArrayRef<const Value *> Args)
: RetTy(Ty), IID(Id) {
@@ -1210,15 +1218,6 @@ InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 7bc0b5b394828..e794281b01fb3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1172,29 +1172,6 @@ InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}
-InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
- !isLegalStridedLoadStore(DataTy, Alignment)) ||
- (Opcode != Instruction::Load && Opcode != Instruction::Store))
- return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
-
- if (CostKind == TTI::TCK_CodeSize)
- return TTI::TCC_Basic;
-
- // Cost is proportional to the number of memory operations implied. For
- // scalable vectors, we use an estimate on that number since we don't
- // know exactly what VL will be.
- auto &VTy = *cast<VectorType>(DataTy);
- InstructionCost MemOpCost =
- getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
- {TTI::OK_AnyValue, TTI::OP_None}, I);
- unsigned NumLoads = getEstimatedVLFor(&VTy);
- return NumLoads * MemOpCost;
-}
-
InstructionCost
RISCVTTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
// FIXME: This is a property of the default vector convention, not
@@ -1561,6 +1538,42 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
0, cast<VectorType>(ICA.getReturnType()));
}
+ case Intrinsic::experimental_vp_strided_load:
+ case Intrinsic::experimental_vp_strided_store: {
+ if (CostKind == TTI::TCK_CodeSize)
+ return TTI::TCC_Basic;
+
+ auto *DataTy = (ICA.getID() == Intrinsic::experimental_vp_strided_load)
+ ? cast<VectorType>(ICA.getReturnType())
+ : cast<VectorType>(ICA.getArgTypes()[0]);
+ Type *EltTy = DataTy->getElementType();
+
+ Align ABITyAlign = DL.getABITypeAlign(EltTy);
+
+ const IntrinsicInst *I = ICA.getInst();
+ Align Alignment;
+ if (ICA.isTypeBasedOnly())
+ Alignment = ICA.getAlign().value_or(ABITyAlign);
+ else {
+ unsigned Index = (ICA.getID() == Intrinsic::experimental_vp_strided_load) ? 0 : 1;
+ Alignment = I->getParamAlign(Index).value_or(ABITyAlign);
+ }
+
+ if (!isLegalStridedLoadStore(DataTy, Alignment))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+ unsigned Opcode = ICA.getID() == Intrinsic::experimental_vp_strided_load
+ ? Instruction::Load
+ : Instruction::Store;
+ // Cost is proportional to the number of memory operations implied. For
+ // scalable vectors, we use an estimate on that number since we don't
+ // know exactly what VL will be.
+ InstructionCost MemOpCost =
+ getMemoryOpCost(Opcode, EltTy, Alignment, 0, CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None}, I);
+ unsigned NumLoads = getEstimatedVLFor(DataTy);
+ return NumLoads * MemOpCost;
+ }
case Intrinsic::fptoui_sat:
case Intrinsic::fptosi_sat: {
InstructionCost Cost = 0;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6886e8964e29e..af456cdd41bd7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -202,12 +202,6 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
Align Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
- InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const override;
-
InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4fcaf6dabb513..262201dac131a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7224,10 +7224,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
VectorGEPCost;
break;
case LoadsState::StridedVectorize:
- VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false,
- CommonAlignment, CostKind) +
+ VecLdCost += TTI.getIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_load,
+ SubVecTy,
+ {},
+ CommonAlignment,
+ /*VariableMask=*/false},
+ CostKind) +
VectorGEPCost;
break;
case LoadsState::CompressVectorize:
@@ -13191,9 +13194,13 @@ void BoUpSLP::transformNodes() {
BaseLI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
- InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
- Instruction::Load, VecTy, BaseLI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
+ InstructionCost StridedCost =
+ TTI->getIntrinsicInstrCost({Intrinsic::experimental_vp_strided_load,
+ VecTy,
+ {},
+ CommonAlignment,
+ /*VariableMask=*/false},
+ CostKind);
if (StridedCost < OriginalVecCost || ForceStridedLoads) {
// Strided load is more profitable than consecutive load + reverse -
// transform the node to strided load.
@@ -13226,9 +13233,13 @@ void BoUpSLP::transformNodes() {
BaseSI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
- InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
+ InstructionCost StridedCost = TTI->getIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_store,
+ Type::getVoidTy(VecTy->getContext()),
+ {VecTy},
+ CommonAlignment,
+ /*VariableMask=*/false},
+ CostKind);
if (StridedCost < OriginalVecCost)
// Strided store is more profitable than reverse + consecutive store -
// transform the node to strided store.
@@ -14991,9 +15002,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case TreeEntry::StridedVectorize: {
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
- VecLdCost = TTI->getStridedMemoryOpCost(
- Instruction::Load, VecTy, LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind);
+ VecLdCost =
+ TTI->getIntrinsicInstrCost({Intrinsic::experimental_vp_strided_load,
+ VecTy,
+ {},
+ CommonAlignment,
+ /*VariableMask=*/false},
+ CostKind);
break;
}
case TreeEntry::CompressVectorize: {
@@ -15084,9 +15099,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (E->State == TreeEntry::StridedVectorize) {
Align CommonAlignment =
computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
- VecStCost = TTI->getStridedMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind);
+ VecStCost = TTI->getIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_store,
+ Type::getVoidTy(VecTy->getContext()),
+ {VecTy},
+ CommonAlignment,
+ /*VariableMask=*/false},
+ CostKind);
} else {
assert(E->State == TreeEntry::Vectorize &&
"Expected either strided or consecutive stores.");
More information about the llvm-commits
mailing list