[llvm] [WIP][TTI] Replace getStridedMemoryOpCost with getIntrinsicInstrCost (PR #165532)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 01:22:27 PST 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/165532
>From b555dd9b958c76c182e4a22483b0fa40dde3bf41 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 30 Oct 2025 18:54:01 -0700
Subject: [PATCH 1/6] Add getMemIntrinsicInstrCost
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7b7dc1b46dd80..cfb181340eb72 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1678,6 +1678,20 @@ class TargetTransformInfo {
LLVM_ABI InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const;
+ /// \returns The cost of memory Intrinsic instructions.
+ /// It is used when IntrinsicInst is not meterialized.
+ /// \p DataTy - a vector type of the data to be loaded or stored
+ /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
+ /// \p VariableMask - true when the memory access is predicated with a mask
+ /// that is not a compile-time constant
+ /// \p Alignment - alignment of single element
+ /// \p I - the optional original context instruction, if one exists, e.g. the
+ /// load/store to transform or the call to the gather/scatter intrinsic
+ LLVM_ABI InstructionCost getMemIntrinsicInstrCost(
+ Intrinsic::ID Id, Type *DataTy, const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
+
/// \returns The cost of Call instructions.
LLVM_ABI InstructionCost getCallInstrCost(
Function *F, Type *RetTy, ArrayRef<Type *> Tys,
>From 5937dbfb5b9c1bfda6d5b8e4ab8f35acbac1664d Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 01:02:53 -0800
Subject: [PATCH 2/6] [TTI] Remove getStridedMemoryOpCost from
Analysis/TargetTransformInfo
---
.../llvm/Analysis/TargetTransformInfo.h | 14 -------
.../llvm/Analysis/TargetTransformInfoImpl.h | 7 ++++
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 41 ++++++++++++++-----
llvm/lib/Analysis/TargetTransformInfo.cpp | 18 ++++----
.../Transforms/Vectorize/SLPVectorizer.cpp | 28 +++++++------
5 files changed, 62 insertions(+), 46 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cfb181340eb72..0676fad465c2d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1586,20 +1586,6 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
- /// \return The cost of strided memory operations.
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4cd607c0d0c8d..2934e6812def2 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -926,6 +926,13 @@ class TargetTransformInfoImplBase {
return 1;
}
+ virtual InstructionCost
+ getMemIntrinsicInstrCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) const {
+ return 1;
+ }
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 76b6c8ec68c72..d501ccc97dac0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1965,9 +1965,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
Align Alignment =
I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
- return thisT()->getStridedMemoryOpCost(Instruction::Store,
- Data->getType(), Ptr, VarMask,
- Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(IID, Data->getType(), Ptr,
+ VarMask, Alignment, CostKind, I);
}
case Intrinsic::experimental_vp_strided_load: {
const Value *Ptr = Args[0];
@@ -1977,8 +1976,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Type *EltTy = cast<VectorType>(RetTy)->getElementType();
Align Alignment =
I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
- return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
- VarMask, Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(IID, RetTy, Ptr, VarMask,
+ Alignment, CostKind, I);
}
case Intrinsic::stepvector: {
if (isa<ScalableVectorType>(RetTy))
@@ -2419,16 +2418,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getStridedMemoryOpCost(
- Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
- Alignment, CostKind, ICA.getInst());
+ return thisT()->getMemIntrinsicInstrCost(IID, Ty, /*Ptr=*/nullptr,
+ /*VariableMask=*/true, Alignment,
+ CostKind, ICA.getInst());
}
case Intrinsic::experimental_vp_strided_load: {
auto *Ty = cast<VectorType>(ICA.getReturnType());
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getStridedMemoryOpCost(
- Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
- Alignment, CostKind, ICA.getInst());
+ return thisT()->getMemIntrinsicInstrCost(IID, Ty, /*Ptr=*/nullptr,
+ /*VariableMask=*/true, Alignment,
+ CostKind, ICA.getInst());
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
@@ -3016,6 +3015,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return SingleCallCost;
}
+ /// Get memory intrinsic cost based on arguments.
+ InstructionCost getMemIntrinsicInstrCost(Intrinsic::ID Id, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) const {
+ switch (Id) {
+ case Intrinsic::experimental_vp_strided_load:
+ case Intrinsic::experimental_vp_strided_store: {
+ unsigned Opcode = (Id == Intrinsic::experimental_vp_strided_load)
+ ? Instruction::Load
+ : Instruction::Store;
+ return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ }
+ default:
+ llvm_unreachable("unexpected intrinsic");
+ }
+ }
+
/// Compute a cost of the given call instruction.
///
/// Compute the cost of calling function F with return type RetTy and
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c47a1c1b23a37..b20ce6779a2b8 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1210,15 +1210,6 @@ InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1238,6 +1229,15 @@ TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Cost;
}
+InstructionCost TargetTransformInfo::getMemIntrinsicInstrCost(
+ unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
+ InstructionCost Cost = TTIImpl->getMemIntrinsicInstrCost(
+ Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost
TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4fcaf6dabb513..d5227b0e2b002 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7224,10 +7224,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
VectorGEPCost;
break;
case LoadsState::StridedVectorize:
- VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false,
- CommonAlignment, CostKind) +
+ VecLdCost += TTI.getMemIntrinsicInstrCost(
+ Intrinsic::experimental_vp_strided_load, SubVecTy,
+ LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment, CostKind) +
VectorGEPCost;
break;
case LoadsState::CompressVectorize:
@@ -13191,8 +13191,9 @@ void BoUpSLP::transformNodes() {
BaseLI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
- InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
- Instruction::Load, VecTy, BaseLI->getPointerOperand(),
+ InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
+ Intrinsic::experimental_vp_strided_load, VecTy,
+ BaseLI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
if (StridedCost < OriginalVecCost || ForceStridedLoads) {
// Strided load is more profitable than consecutive load + reverse -
@@ -13226,8 +13227,9 @@ void BoUpSLP::transformNodes() {
BaseSI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
- InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getPointerOperand(),
+ InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
+ Intrinsic::experimental_vp_strided_store, VecTy,
+ BaseSI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
if (StridedCost < OriginalVecCost)
// Strided store is more profitable than reverse + consecutive store -
@@ -14991,8 +14993,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case TreeEntry::StridedVectorize: {
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
- VecLdCost = TTI->getStridedMemoryOpCost(
- Instruction::Load, VecTy, LI0->getPointerOperand(),
+ VecLdCost = TTI->getMemIntrinsicInstrCost(
+ Intrinsic::experimental_vp_strided_load, VecTy,
+ LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind);
break;
}
@@ -15084,8 +15087,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (E->State == TreeEntry::StridedVectorize) {
Align CommonAlignment =
computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
- VecStCost = TTI->getStridedMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getPointerOperand(),
+ VecStCost = TTI->getMemIntrinsicInstrCost(
+ Intrinsic::experimental_vp_strided_store, VecTy,
+ BaseSI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind);
} else {
assert(E->State == TreeEntry::Vectorize &&
>From 0a7dacc646c10b380149a6d79bc6db47a1b85d69 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 01:46:36 -0800
Subject: [PATCH 3/6] [TTI] Remove getGatherScatterOpCost from
Analysis/TargetTransformInfo
---
.../llvm/Analysis/TargetTransformInfo.h | 14 --------
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 34 +++++++++++++------
llvm/lib/Analysis/TargetTransformInfo.cpp | 10 ------
.../Transforms/Vectorize/LoopVectorize.cpp | 9 +++--
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 ++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 8 +++--
6 files changed, 46 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0676fad465c2d..cbbfbec16c1e0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1559,20 +1559,6 @@ class TargetTransformInfo {
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
- /// \return The cost of Gather or Scatter operation
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
/// \return The cost of Expand Load or Compress Store operation
/// \p Opcode - is a type of memory access Load or Store
/// \p Src - a vector type of the data to be loaded or stored
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d501ccc97dac0..b7d01d4d9c313 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1807,9 +1807,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
bool VarMask = isa<Constant>(ICA.getArgs()[2]);
- return thisT()->getGatherScatterOpCost(
- Instruction::Store, ICA.getArgTypes()[0], ICA.getArgs()[1], VarMask,
- Alignment, CostKind, nullptr);
+ return thisT()->getMemIntrinsicInstrCost(
+ Intrinsic::vp_scatter, ICA.getArgTypes()[0], ICA.getArgs()[1],
+ VarMask, Alignment, CostKind, nullptr);
}
if (ICA.getID() == Intrinsic::vp_gather) {
if (ICA.isTypeBasedOnly()) {
@@ -1823,9 +1823,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
bool VarMask = isa<Constant>(ICA.getArgs()[1]);
- return thisT()->getGatherScatterOpCost(
- Instruction::Load, ICA.getReturnType(), ICA.getArgs()[0], VarMask,
- Alignment, CostKind, nullptr);
+ return thisT()->getMemIntrinsicInstrCost(
+ Intrinsic::vp_gather, ICA.getReturnType(), ICA.getArgs()[0],
+ VarMask, Alignment, CostKind, nullptr);
}
if (ICA.getID() == Intrinsic::vp_select ||
@@ -1930,16 +1930,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = I->getParamAlign(1).valueOrOne();
- return thisT()->getGatherScatterOpCost(Instruction::Store,
- ICA.getArgTypes()[0], Args[1],
- VarMask, Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(Intrinsic::masked_scatter,
+ ICA.getArgTypes()[0], Args[1],
+ VarMask, Alignment, CostKind, I);
}
case Intrinsic::masked_gather: {
const Value *Mask = Args[1];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = I->getParamAlign(0).valueOrOne();
- return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
- VarMask, Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(Intrinsic::masked_gather, RetTy,
+ Args[0], VarMask, Alignment,
+ CostKind, I);
}
case Intrinsic::masked_compressstore: {
const Value *Data = Args[0];
@@ -3030,6 +3031,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_gather:
+ case Intrinsic::vp_scatter:
+ case Intrinsic::vp_gather: {
+ unsigned Opcode =
+ ((Id == Intrinsic::masked_gather) || (Id == Intrinsic::vp_gather))
+ ? Instruction::Load
+ : Instruction::Store;
+ return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ }
default:
llvm_unreachable("unexpected intrinsic");
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b20ce6779a2b8..94a7e0ea03437 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1191,16 +1191,6 @@ InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert((!Cost.isValid() || Cost >= 0) &&
- "TTI should not produce negative costs!");
- return Cost;
-}
-
InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index facb0fabdf57e..6d68130b48c2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5257,10 +5257,13 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
if (!Legal->isUniform(Ptr, VF))
PtrTy = toVectorTy(PtrTy, VF);
+ unsigned IID = (I->getOpcode() == Instruction::Load)
+ ? Intrinsic::masked_gather
+ : Intrinsic::masked_scatter;
return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment,
- CostKind, I);
+ TTI.getMemIntrinsicInstrCost(IID, VectorTy, Ptr,
+ Legal->isMaskRequired(I), Alignment,
+ CostKind, I);
}
InstructionCost
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d5227b0e2b002..8cdbbb3e63d5e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7115,9 +7115,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ScalarGEPCost;
// The cost of masked gather.
InstructionCost MaskedGatherCost =
- TTI.getGatherScatterOpCost(
- Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind) +
+ TTI.getMemIntrinsicInstrCost(Intrinsic::masked_gather, VecTy,
+ cast<LoadInst>(VL0)->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment,
+ CostKind) +
(ProfitableGatherPointers ? 0 : VectorGEPCost);
InstructionCost GatherCost =
getScalarizationOverhead(TTI, ScalarTy, VecTy, DemandedElts,
@@ -7239,11 +7240,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
{}, CostKind);
break;
case LoadsState::ScatterVectorize:
- VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false,
- CommonAlignment, CostKind) +
- VectorGEPCost;
+ VecLdCost +=
+ TTI.getMemIntrinsicInstrCost(
+ Intrinsic::masked_gather, SubVecTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment, CostKind) +
+ VectorGEPCost;
break;
case LoadsState::Gather:
// Gathers are already calculated - ignore.
@@ -15045,8 +15046,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case TreeEntry::ScatterVectorize: {
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
- VecLdCost = TTI->getGatherScatterOpCost(
- Instruction::Load, VecTy, LI0->getPointerOperand(),
+ VecLdCost = TTI->getMemIntrinsicInstrCost(
+ Intrinsic::masked_gather, VecTy, LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind);
break;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 931a5b7582c4e..ea835566a1723 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3547,10 +3547,14 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
if (!vputils::isSingleScalar(getAddr()))
PtrTy = toVectorTy(PtrTy, VF);
+ unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_gather
+ : isa<VPWidenStoreRecipe>(this) ? Intrinsic::masked_scatter
+ : isa<VPWidenLoadEVLRecipe>(this) ? Intrinsic::vp_gather
+ : Intrinsic::vp_scatter;
return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
Ctx.CostKind) +
- Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
- Ctx.CostKind, &Ingredient);
+ Ctx.TTI.getMemIntrinsicInstrCost(IID, Ty, Ptr, IsMasked, Alignment,
+ Ctx.CostKind, &Ingredient);
}
InstructionCost Cost = 0;
>From 7609d22100d0f2c1d22bff79b9e38d761d55e2c5 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 22:42:26 -0800
Subject: [PATCH 4/6] [TTI] Remove getMaskedMemoryOpCost from
Analysis/TargetTransformInfo
---
.../llvm/Analysis/TargetTransformInfo.h | 5 ----
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 29 ++++++++++++++-----
llvm/lib/Analysis/TargetTransformInfo.cpp | 9 ------
.../Transforms/Vectorize/LoopVectorize.cpp | 7 +++--
.../Transforms/Vectorize/SLPVectorizer.cpp | 25 ++++++++--------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 27 ++++++++++-------
6 files changed, 56 insertions(+), 46 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cbbfbec16c1e0..0b3c4a1a999da 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1554,11 +1554,6 @@ class TargetTransformInfo {
OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
const Instruction *I = nullptr) const;
- /// \return The cost of masked Load and Store instructions.
- LLVM_ABI InstructionCost getMaskedMemoryOpCost(
- unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
/// \return The cost of Expand Load or Compress Store operation
/// \p Opcode - is a type of memory access Load or Store
/// \p Src - a vector type of the data to be loaded or stored
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index b7d01d4d9c313..9df4efca50820 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1605,10 +1605,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// Firstly, the cost of load/store operation.
InstructionCost Cost;
- if (UseMaskForCond || UseMaskForGaps)
- Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
- AddressSpace, CostKind);
- else
+ if (UseMaskForCond || UseMaskForGaps) {
+ unsigned IID = (Opcode == Instruction::Load) ? Intrinsic::masked_load
+ : Intrinsic::masked_store;
+ Cost = thisT()->getMemIntrinsicInstrCost(
+ IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ false, Alignment,
+ CostKind, /*Instruction*/ nullptr);
+ } else
Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
CostKind);
@@ -2407,14 +2410,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_store: {
Type *Ty = Tys[0];
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
- return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
- CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ Intrinsic::masked_store, Ty,
+ /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
}
case Intrinsic::masked_load: {
Type *Ty = RetTy;
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
- return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
- CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ Intrinsic::masked_load, Ty,
+ /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
}
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
@@ -3042,6 +3047,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store: {
+ unsigned Opcode = (Id == Intrinsic::masked_load) ? Instruction::Load
+ : Instruction::Store;
+ unsigned AS = Ptr ? Ptr->getType()->getPointerAddressSpace() : 0;
+ return thisT()->getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AS,
+ CostKind);
+ }
default:
llvm_unreachable("unexpected intrinsic");
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 94a7e0ea03437..8ad85e19c5f46 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1182,15 +1182,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
- unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind) const {
- InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment,
- AddressSpace, CostKind);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d68130b48c2d..f1769f05f218f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5197,8 +5197,11 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
const Align Alignment = getLoadStoreAlignment(I);
InstructionCost Cost = 0;
if (Legal->isMaskRequired(I)) {
- Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
- CostKind);
+ unsigned IID = (I->getOpcode() == Instruction::Load)
+ ? Intrinsic::masked_load
+ : Intrinsic::masked_store;
+ Cost += TTI.getMemIntrinsicInstrCost(
+ IID, VectorTy, Ptr, /*VariableMask*/ false, Alignment, CostKind);
} else {
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8cdbbb3e63d5e..32a1579072ad0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6831,9 +6831,9 @@ static bool isMaskedLoadCompress(
ScalarLoadsCost;
InstructionCost LoadCost = 0;
if (IsMasked) {
- LoadCost =
- TTI.getMaskedMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
- LI->getPointerAddressSpace(), CostKind);
+ LoadCost = TTI.getMemIntrinsicInstrCost(
+ Intrinsic::masked_load, LoadVecTy, LI->getPointerOperand(),
+ /*VariableMask*/ false, CommonAlignment, CostKind);
} else {
LoadCost =
TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
@@ -7232,12 +7232,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
VectorGEPCost;
break;
case LoadsState::CompressVectorize:
- VecLdCost += TTI.getMaskedMemoryOpCost(
- Instruction::Load, SubVecTy, CommonAlignment,
- LI0->getPointerAddressSpace(), CostKind) +
- VectorGEPCost +
- ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy,
- {}, CostKind);
+ VecLdCost +=
+ TTI.getMemIntrinsicInstrCost(
+ Intrinsic::masked_load, SubVecTy, LI0->getPointerOperand(),
+ /*VariableMask*/ false, CommonAlignment, CostKind) +
+ VectorGEPCost +
+ ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy, {},
+ CostKind);
break;
case LoadsState::ScatterVectorize:
VecLdCost +=
@@ -15027,9 +15028,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Instruction::Load, LoadVecTy, InterleaveFactor, {},
CommonAlignment, LI0->getPointerAddressSpace(), CostKind);
} else if (IsMasked) {
- VecLdCost = TTI->getMaskedMemoryOpCost(
- Instruction::Load, LoadVecTy, CommonAlignment,
- LI0->getPointerAddressSpace(), CostKind);
+ VecLdCost = TTI->getMemIntrinsicInstrCost(
+ Intrinsic::masked_load, LoadVecTy, LI0->getPointerOperand(),
+ /*VariableMask*/ false, CommonAlignment, CostKind);
// TODO: include this cost into CommonCost.
VecLdCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
LoadVecTy, CompressMask, CostKind);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ea835566a1723..06d68823c2a14 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3559,8 +3559,11 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
InstructionCost Cost = 0;
if (IsMasked) {
- Cost +=
- Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind);
+ unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_load
+ : Intrinsic::masked_store;
+ const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+ Cost += Ctx.TTI.getMemIntrinsicInstrCost(
+ IID, Ty, Ptr, /*VariableMask*/ false, Alignment, Ctx.CostKind);
} else {
TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3672,16 +3675,18 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
if (!Consecutive || IsMasked)
return VPWidenMemoryRecipe::computeCost(VF, Ctx);
- // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
+ // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()
// here because the EVL recipes using EVL to replace the tail mask. But in the
// legacy model, it will always calculate the cost of mask.
- // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
+ // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
unsigned AS = getLoadStoreAddressSpace(&Ingredient);
- InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
- Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
+ const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+ InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
+ Intrinsic::masked_load, Ty, Ptr, /*VariableMask*/ false, Alignment,
+ Ctx.CostKind);
if (!Reverse)
return Cost;
@@ -3783,16 +3788,18 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
if (!Consecutive || IsMasked)
return VPWidenMemoryRecipe::computeCost(VF, Ctx);
- // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
+ // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()
// here because the EVL recipes using EVL to replace the tail mask. But in the
// legacy model, it will always calculate the cost of mask.
- // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
+ // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
unsigned AS = getLoadStoreAddressSpace(&Ingredient);
- InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
- Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
+ const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+ InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
+ Intrinsic::masked_store, Ty, Ptr, /*VariableMask*/ false, Alignment,
+ Ctx.CostKind);
if (!Reverse)
return Cost;
>From 15474cd98ea41551d5f5a547acd084f091c23f91 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 3 Nov 2025 23:21:36 -0800
Subject: [PATCH 5/6] [TTI] Remove getExpandCompressMemoryOpCost from
Analysis/TargetTransformInfo
---
.../llvm/Analysis/TargetTransformInfo.h | 13 ------------
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 20 +++++++++++++------
llvm/lib/Analysis/TargetTransformInfo.cpp | 9 ---------
3 files changed, 14 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0b3c4a1a999da..a5462741bbd59 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1554,19 +1554,6 @@ class TargetTransformInfo {
OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
const Instruction *I = nullptr) const;
- /// \return The cost of Expand Load or Compress Store operation
- /// \p Opcode - is a type of memory access Load or Store
- /// \p Src - a vector type of the data to be loaded or stored
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getExpandCompressMemoryOpCost(
- unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 9df4efca50820..02215b6de3ceb 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1949,16 +1949,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const Value *Data = Args[0];
const Value *Mask = Args[2];
Align Alignment = I->getParamAlign(1).valueOrOne();
- return thisT()->getExpandCompressMemoryOpCost(
- Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
- CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(
+ Intrinsic::masked_compressstore, Data->getType(), Args[1],
+ !isa<Constant>(Mask), Alignment, CostKind, I);
}
case Intrinsic::masked_expandload: {
const Value *Mask = Args[1];
Align Alignment = I->getParamAlign(0).valueOrOne();
- return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy,
- !isa<Constant>(Mask),
- Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(
+ Intrinsic::masked_expandload, RetTy, Args[0], !isa<Constant>(Mask),
+ Alignment, CostKind, I);
}
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
@@ -3055,6 +3055,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return thisT()->getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AS,
CostKind);
}
+ case Intrinsic::masked_compressstore:
+ case Intrinsic::masked_expandload: {
+ unsigned Opcode = (Id == Intrinsic::masked_expandload)
+ ? Instruction::Load
+ : Instruction::Store;
+ return thisT()->getExpandCompressMemoryOpCost(
+ Opcode, DataTy, VariableMask, Alignment, CostKind, I);
+ }
default:
llvm_unreachable("unexpected intrinsic");
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8ad85e19c5f46..9be796193c1cc 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1182,15 +1182,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
- unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getExpandCompressMemoryOpCost(
- Opcode, DataTy, VariableMask, Alignment, CostKind, I);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
>From 2c2e772c5d6e682b2990e0ca3cac85a584820782 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 4 Nov 2025 01:21:35 -0800
Subject: [PATCH 6/6] Use true for default variable mask for masked load/store
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 +++---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 +++---
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 02215b6de3ceb..0bccf8d5347db 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1609,7 +1609,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned IID = (Opcode == Instruction::Load) ? Intrinsic::masked_load
: Intrinsic::masked_store;
Cost = thisT()->getMemIntrinsicInstrCost(
- IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ false, Alignment,
+ IID, VecTy, /*Ptr*/ nullptr, /*VariableMask*/ true, Alignment,
CostKind, /*Instruction*/ nullptr);
} else
Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
@@ -2412,14 +2412,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
return thisT()->getMemIntrinsicInstrCost(
Intrinsic::masked_store, Ty,
- /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
+ /*Ptr*/ nullptr, /*VariableMask*/ true, TyAlign, CostKind, nullptr);
}
case Intrinsic::masked_load: {
Type *Ty = RetTy;
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
return thisT()->getMemIntrinsicInstrCost(
Intrinsic::masked_load, Ty,
- /*Ptr*/ nullptr, /*VariableMask*/ false, TyAlign, CostKind, nullptr);
+ /*Ptr*/ nullptr, /*VariableMask*/ true, TyAlign, CostKind, nullptr);
}
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f1769f05f218f..c5c9477b2aa48 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5201,7 +5201,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
? Intrinsic::masked_load
: Intrinsic::masked_store;
Cost += TTI.getMemIntrinsicInstrCost(
- IID, VectorTy, Ptr, /*VariableMask*/ false, Alignment, CostKind);
+ IID, VectorTy, Ptr, /*VariableMask*/ true, Alignment, CostKind);
} else {
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32a1579072ad0..8f4cbc48f865c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6833,7 +6833,7 @@ static bool isMaskedLoadCompress(
if (IsMasked) {
LoadCost = TTI.getMemIntrinsicInstrCost(
Intrinsic::masked_load, LoadVecTy, LI->getPointerOperand(),
- /*VariableMask*/ false, CommonAlignment, CostKind);
+ /*VariableMask*/ true, CommonAlignment, CostKind);
} else {
LoadCost =
TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
@@ -7235,7 +7235,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
VecLdCost +=
TTI.getMemIntrinsicInstrCost(
Intrinsic::masked_load, SubVecTy, LI0->getPointerOperand(),
- /*VariableMask*/ false, CommonAlignment, CostKind) +
+ /*VariableMask*/ true, CommonAlignment, CostKind) +
VectorGEPCost +
::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy, {},
CostKind);
@@ -15030,7 +15030,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
} else if (IsMasked) {
VecLdCost = TTI->getMemIntrinsicInstrCost(
Intrinsic::masked_load, LoadVecTy, LI0->getPointerOperand(),
- /*VariableMask*/ false, CommonAlignment, CostKind);
+ /*VariableMask*/ true, CommonAlignment, CostKind);
// TODO: include this cost into CommonCost.
VecLdCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
LoadVecTy, CompressMask, CostKind);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 06d68823c2a14..ab1014ac9cc4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3563,7 +3563,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
: Intrinsic::masked_store;
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
Cost += Ctx.TTI.getMemIntrinsicInstrCost(
- IID, Ty, Ptr, /*VariableMask*/ false, Alignment, Ctx.CostKind);
+ IID, Ty, Ptr, /*VariableMask*/ true, Alignment, Ctx.CostKind);
} else {
TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3685,7 +3685,7 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
unsigned AS = getLoadStoreAddressSpace(&Ingredient);
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
- Intrinsic::masked_load, Ty, Ptr, /*VariableMask*/ false, Alignment,
+ Intrinsic::masked_load, Ty, Ptr, /*VariableMask*/ true, Alignment,
Ctx.CostKind);
if (!Reverse)
return Cost;
@@ -3798,7 +3798,7 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
unsigned AS = getLoadStoreAddressSpace(&Ingredient);
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
- Intrinsic::masked_store, Ty, Ptr, /*VariableMask*/ false, Alignment,
+ Intrinsic::masked_store, Ty, Ptr, /*VariableMask*/ true, Alignment,
Ctx.CostKind);
if (!Reverse)
return Cost;
More information about the llvm-commits
mailing list