[llvm] [TTI][Vectorize] Migrate masked/gather-scatter/strided/expand-compress costing (NFCI) (PR #165532)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 27 20:48:27 PST 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/165532
>From bcfe3dd40d21c1f2db557a15670dd7f62e406807 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Sun, 23 Nov 2025 18:01:39 -0800
Subject: [PATCH 1/3] Add getMemIntrinsicInstrCost
---
.../llvm/Analysis/TargetTransformInfo.h | 77 +++++-------
.../llvm/Analysis/TargetTransformInfoImpl.h | 5 +
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 115 +++++++++++++-----
llvm/lib/Analysis/TargetTransformInfo.cpp | 44 ++-----
.../Transforms/Vectorize/LoopVectorize.cpp | 12 +-
.../Transforms/Vectorize/SLPVectorizer.cpp | 79 ++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 24 ++--
7 files changed, 195 insertions(+), 161 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index bd4c901e9bc82..868e06d65dee7 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -125,12 +125,23 @@ struct HardwareLoopInfo {
/// Information for memory intrinsic cost model.
class MemIntrinsicCostAttributes {
+ /// Optional context instruction, if one exists, e.g. the
+ /// load/store to transform to the intrinsic.
+ const Instruction *I = nullptr;
+
+ /// Address in memory.
+ const Value *Ptr = nullptr;
+
/// Vector type of the data to be loaded or stored.
Type *DataTy = nullptr;
/// ID of the memory intrinsic.
Intrinsic::ID IID;
+ /// True when the memory access is predicated with a mask
+ /// that is not a compile-time constant.
+ bool VariableMask = true;
+
/// Address space of the pointer.
unsigned AddressSpace = 0;
@@ -138,13 +149,27 @@ class MemIntrinsicCostAttributes {
Align Alignment;
public:
+ LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment,
+ const Instruction *I = nullptr)
+ : I(I), Ptr(Ptr), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
+ Alignment(Alignment) {}
LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
Align Alignment, unsigned AddressSpace)
: DataTy(DataTy), IID(Id), AddressSpace(AddressSpace),
Alignment(Alignment) {}
+ LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
+ bool VariableMask, Align Alignment,
+ const Instruction *I = nullptr)
+ : I(I), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
+ Alignment(Alignment) {}
Intrinsic::ID getID() const { return IID; }
+ const Instruction *getInst() const { return I; }
+ const Value *getPointer() const { return Ptr; }
Type *getDataType() const { return DataTy; }
+ bool getVariableMask() const { return VariableMask; }
unsigned getAddressSpace() const { return AddressSpace; }
Align getAlignment() const { return Alignment; }
};
@@ -1584,52 +1609,6 @@ class TargetTransformInfo {
OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
const Instruction *I = nullptr) const;
- /// \return The cost of masked Load and Store instructions.
- LLVM_ABI InstructionCost getMaskedMemoryOpCost(
- const MemIntrinsicCostAttributes &MICA,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
- /// \return The cost of Gather or Scatter operation
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
- /// \return The cost of Expand Load or Compress Store operation
- /// \p Opcode - is a type of memory access Load or Store
- /// \p Src - a vector type of the data to be loaded or stored
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getExpandCompressMemoryOpCost(
- unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
- /// \return The cost of strided memory operations.
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
@@ -1708,6 +1687,12 @@ class TargetTransformInfo {
LLVM_ABI InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const;
+ /// \returns The cost of memory intrinsic instructions.
+ /// Used when IntrinsicInst is not materialized.
+ LLVM_ABI InstructionCost
+ getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
+
/// \returns The cost of Call instructions.
LLVM_ABI InstructionCost getCallInstrCost(
Function *F, Type *RetTy, ArrayRef<Type *> Tys,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 580b219ddbe53..d83042048334d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -927,6 +927,11 @@ class TargetTransformInfoImplBase {
return 1;
}
+ virtual InstructionCost
+ getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+ return 1;
+ }
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index cb389ae74ef46..afbfd7746679f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1624,7 +1624,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (UseMaskForCond || UseMaskForGaps) {
unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load
: Intrinsic::masked_store;
- Cost = thisT()->getMaskedMemoryOpCost(
+ Cost = thisT()->getMemIntrinsicInstrCost(
{IID, VecTy, Alignment, AddressSpace}, CostKind);
} else
Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
@@ -1825,9 +1825,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
bool VarMask = isa<Constant>(ICA.getArgs()[2]);
- return thisT()->getGatherScatterOpCost(
- Instruction::Store, ICA.getArgTypes()[0], ICA.getArgs()[1], VarMask,
- Alignment, CostKind, nullptr);
+ return thisT()->getMemIntrinsicInstrCost(
+ {Intrinsic::vp_scatter, ICA.getArgTypes()[0], ICA.getArgs()[1],
+ VarMask, Alignment, nullptr},
+ CostKind);
}
if (ICA.getID() == Intrinsic::vp_gather) {
if (ICA.isTypeBasedOnly()) {
@@ -1841,9 +1842,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
bool VarMask = isa<Constant>(ICA.getArgs()[1]);
- return thisT()->getGatherScatterOpCost(
- Instruction::Load, ICA.getReturnType(), ICA.getArgs()[0], VarMask,
- Alignment, CostKind, nullptr);
+ return thisT()->getMemIntrinsicInstrCost(
+ {Intrinsic::vp_gather, ICA.getReturnType(), ICA.getArgs()[0],
+ VarMask, Alignment, nullptr},
+ CostKind);
}
if (ICA.getID() == Intrinsic::vp_select ||
@@ -1948,31 +1950,35 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = I->getParamAlign(1).valueOrOne();
- return thisT()->getGatherScatterOpCost(Instruction::Store,
- ICA.getArgTypes()[0], Args[1],
- VarMask, Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost({Intrinsic::masked_scatter,
+ ICA.getArgTypes()[0], Args[1],
+ VarMask, Alignment, I},
+ CostKind);
}
case Intrinsic::masked_gather: {
const Value *Mask = Args[1];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = I->getParamAlign(0).valueOrOne();
- return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
- VarMask, Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(
+ {Intrinsic::masked_gather, RetTy, Args[0], VarMask, Alignment, I},
+ CostKind);
}
case Intrinsic::masked_compressstore: {
const Value *Data = Args[0];
const Value *Mask = Args[2];
Align Alignment = I->getParamAlign(1).valueOrOne();
- return thisT()->getExpandCompressMemoryOpCost(
- Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
- CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(
+ {Intrinsic::masked_compressstore, Data->getType(),
+ !isa<Constant>(Mask), Alignment, I},
+ CostKind);
}
case Intrinsic::masked_expandload: {
const Value *Mask = Args[1];
Align Alignment = I->getParamAlign(0).valueOrOne();
- return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy,
- !isa<Constant>(Mask),
- Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost({Intrinsic::masked_expandload,
+ RetTy, !isa<Constant>(Mask),
+ Alignment, I},
+ CostKind);
}
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
@@ -1983,9 +1989,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
Align Alignment =
I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
- return thisT()->getStridedMemoryOpCost(Instruction::Store,
- Data->getType(), Ptr, VarMask,
- Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(
+ {IID, Data->getType(), Ptr, VarMask, Alignment, I}, CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
const Value *Ptr = Args[0];
@@ -1995,8 +2000,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Type *EltTy = cast<VectorType>(RetTy)->getElementType();
Align Alignment =
I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
- return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
- VarMask, Alignment, CostKind, I);
+ return thisT()->getMemIntrinsicInstrCost(
+ {IID, RetTy, Ptr, VarMask, Alignment, I}, CostKind);
}
case Intrinsic::stepvector: {
if (isa<ScalableVectorType>(RetTy))
@@ -2409,26 +2414,28 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_store: {
Type *Ty = Tys[0];
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
- return thisT()->getMaskedMemoryOpCost({IID, Ty, TyAlign, 0}, CostKind);
+ return thisT()->getMemIntrinsicInstrCost({IID, Ty, TyAlign, 0}, CostKind);
}
case Intrinsic::masked_load: {
Type *Ty = RetTy;
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
- return thisT()->getMaskedMemoryOpCost({IID, Ty, TyAlign, 0}, CostKind);
+ return thisT()->getMemIntrinsicInstrCost({IID, Ty, TyAlign, 0}, CostKind);
}
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getStridedMemoryOpCost(
- Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
- Alignment, CostKind, ICA.getInst());
+ return thisT()->getMemIntrinsicInstrCost({IID, Ty, /*Ptr=*/nullptr,
+ /*VariableMask=*/true,
+ Alignment, ICA.getInst()},
+ CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
auto *Ty = cast<VectorType>(ICA.getReturnType());
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getStridedMemoryOpCost(
- Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
- Alignment, CostKind, ICA.getInst());
+ return thisT()->getMemIntrinsicInstrCost({IID, Ty, /*Ptr=*/nullptr,
+ /*VariableMask=*/true,
+ Alignment, ICA.getInst()},
+ CostKind);
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
@@ -3016,6 +3023,52 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return SingleCallCost;
}
+ /// Get memory intrinsic cost based on arguments.
+ InstructionCost
+ getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const override {
+ unsigned Id = MICA.getID();
+ Type *DataTy = MICA.getDataType();
+ const Value *Ptr = MICA.getPointer();
+ const Instruction *I = MICA.getInst();
+ bool VariableMask = MICA.getVariableMask();
+ Align Alignment = MICA.getAlignment();
+
+ switch (Id) {
+ case Intrinsic::experimental_vp_strided_load:
+ case Intrinsic::experimental_vp_strided_store: {
+ unsigned Opcode = Id == Intrinsic::experimental_vp_strided_load
+ ? Instruction::Load
+ : Instruction::Store;
+ return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ }
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_gather:
+ case Intrinsic::vp_scatter:
+ case Intrinsic::vp_gather: {
+ unsigned Opcode =
+ (Id == Intrinsic::masked_gather || Id == Intrinsic::vp_gather)
+ ? Instruction::Load
+ : Instruction::Store;
+ return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ }
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store:
+ return thisT()->getMaskedMemoryOpCost(MICA, CostKind);
+ case Intrinsic::masked_compressstore:
+ case Intrinsic::masked_expandload: {
+ unsigned Opcode = Id == Intrinsic::masked_expandload ? Instruction::Load
+ : Instruction::Store;
+ return thisT()->getExpandCompressMemoryOpCost(
+ Opcode, DataTy, VariableMask, Alignment, CostKind, I);
+ }
+ default:
+ llvm_unreachable("unexpected intrinsic");
+ }
+ }
+
/// Compute a cost of the given call instruction.
///
/// Compute the cost of calling function F with return type RetTy and
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 46f90b4cec7c9..b4b45b227b3a6 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1186,42 +1186,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
- const MemIntrinsicCostAttributes &MICA,
- TTI::TargetCostKind CostKind) const {
- InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(MICA, CostKind);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
-InstructionCost TargetTransformInfo::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert((!Cost.isValid() || Cost >= 0) &&
- "TTI should not produce negative costs!");
- return Cost;
-}
-
-InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
- unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getExpandCompressMemoryOpCost(
- Opcode, DataTy, VariableMask, Alignment, CostKind, I);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
-InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1241,6 +1205,14 @@ TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Cost;
}
+InstructionCost TargetTransformInfo::getMemIntrinsicInstrCost(
+ const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+ InstructionCost Cost = TTIImpl->getMemIntrinsicInstrCost(MICA, CostKind);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost
TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 277e43a38018e..26f9ca98291c7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5254,7 +5254,8 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned IID = I->getOpcode() == Instruction::Load
? Intrinsic::masked_load
: Intrinsic::masked_store;
- Cost += TTI.getMaskedMemoryOpCost({IID, VectorTy, Alignment, AS}, CostKind);
+ Cost +=
+ TTI.getMemIntrinsicInstrCost({IID, VectorTy, Alignment, AS}, CostKind);
} else {
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -5313,10 +5314,13 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
if (!Legal->isUniform(Ptr, VF))
PtrTy = toVectorTy(PtrTy, VF);
+ unsigned IID = I->getOpcode() == Instruction::Load
+ ? Intrinsic::masked_gather
+ : Intrinsic::masked_scatter;
return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment,
- CostKind, I);
+ TTI.getMemIntrinsicInstrCost(
+ {IID, VectorTy, Ptr, Legal->isMaskRequired(I), Alignment, I},
+ CostKind);
}
InstructionCost
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3b36ccbd677dc..f53d810562352 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6900,10 +6900,10 @@ static bool isMaskedLoadCompress(
ScalarLoadsCost;
InstructionCost LoadCost = 0;
if (IsMasked) {
- LoadCost = TTI.getMaskedMemoryOpCost({Intrinsic::masked_load, LoadVecTy,
- CommonAlignment,
- LI->getPointerAddressSpace()},
- CostKind);
+ LoadCost = TTI.getMemIntrinsicInstrCost({Intrinsic::masked_load, LoadVecTy,
+ CommonAlignment,
+ LI->getPointerAddressSpace()},
+ CostKind);
} else {
LoadCost =
TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
@@ -7246,9 +7246,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ScalarGEPCost;
// The cost of masked gather.
InstructionCost MaskedGatherCost =
- TTI.getGatherScatterOpCost(
- Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind) +
+ TTI.getMemIntrinsicInstrCost({Intrinsic::masked_gather, VecTy,
+ cast<LoadInst>(VL0)->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment},
+ CostKind) +
(ProfitableGatherPointers ? 0 : VectorGEPCost);
InstructionCost GatherCost =
getScalarizationOverhead(TTI, ScalarTy, VecTy, DemandedElts,
@@ -7355,27 +7356,28 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
VectorGEPCost;
break;
case LoadsState::StridedVectorize:
- VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false,
- CommonAlignment, CostKind) +
+ VecLdCost += TTI.getMemIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_load, SubVecTy,
+ LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment},
+ CostKind) +
VectorGEPCost;
break;
case LoadsState::CompressVectorize:
- VecLdCost += TTI.getMaskedMemoryOpCost(
+ VecLdCost += TTI.getMemIntrinsicInstrCost(
{Intrinsic::masked_load, SubVecTy, CommonAlignment,
LI0->getPointerAddressSpace()},
CostKind) +
- VectorGEPCost +
::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy,
{}, CostKind);
break;
case LoadsState::ScatterVectorize:
- VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false,
- CommonAlignment, CostKind) +
- VectorGEPCost;
+ VecLdCost +=
+ TTI.getMemIntrinsicInstrCost(
+ {Intrinsic::masked_gather, SubVecTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment},
+ CostKind) +
+ VectorGEPCost;
break;
case LoadsState::Gather:
// Gathers are already calculated - ignore.
@@ -13328,9 +13330,11 @@ void BoUpSLP::transformNodes() {
BaseLI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
- InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
- Instruction::Load, VecTy, BaseLI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
+ InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_load, VecTy,
+ BaseLI->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment, BaseLI},
+ CostKind);
if (StridedCost < OriginalVecCost || ForceStridedLoads) {
// Strided load is more profitable than consecutive load + reverse -
// transform the node to strided load.
@@ -13363,9 +13367,11 @@ void BoUpSLP::transformNodes() {
BaseSI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
- InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
+ InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_store, VecTy,
+ BaseSI->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment, BaseSI},
+ CostKind);
if (StridedCost < OriginalVecCost)
// Strided store is more profitable than reverse + consecutive store -
// transform the node to strided store.
@@ -15131,9 +15137,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
assert(StridedLoadTy && "Missing StridedPoinerInfo for tree entry.");
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
- VecLdCost = TTI->getStridedMemoryOpCost(
- Instruction::Load, StridedLoadTy, LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind);
+ VecLdCost = TTI->getMemIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_load, StridedLoadTy,
+ LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment},
+ CostKind);
if (StridedLoadTy != VecTy)
VecLdCost +=
TTI->getCastInstrCost(Instruction::BitCast, VecTy, StridedLoadTy,
@@ -15168,7 +15176,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Instruction::Load, LoadVecTy, InterleaveFactor, {},
CommonAlignment, LI0->getPointerAddressSpace(), CostKind);
} else if (IsMasked) {
- VecLdCost = TTI->getMaskedMemoryOpCost(
+ VecLdCost = TTI->getMemIntrinsicInstrCost(
{Intrinsic::masked_load, LoadVecTy, CommonAlignment,
LI0->getPointerAddressSpace()},
CostKind);
@@ -15188,9 +15196,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case TreeEntry::ScatterVectorize: {
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
- VecLdCost = TTI->getGatherScatterOpCost(
- Instruction::Load, VecTy, LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind);
+ VecLdCost = TTI->getMemIntrinsicInstrCost(
+ {Intrinsic::masked_gather, VecTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment},
+ CostKind);
break;
}
case TreeEntry::CombinedVectorize:
@@ -15230,9 +15239,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (E->State == TreeEntry::StridedVectorize) {
Align CommonAlignment =
computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
- VecStCost = TTI->getStridedMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, CostKind);
+ VecStCost = TTI->getMemIntrinsicInstrCost(
+ {Intrinsic::experimental_vp_strided_store, VecTy,
+ BaseSI->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment},
+ CostKind);
} else {
assert(E->State == TreeEntry::Vectorize &&
"Expected either strided or consecutive stores.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b27f2f8a3c8cb..7afc6c820a22a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3594,18 +3594,22 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
if (!vputils::isSingleScalar(getAddr()))
PtrTy = toVectorTy(PtrTy, VF);
+ unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_gather
+ : isa<VPWidenStoreRecipe>(this) ? Intrinsic::masked_scatter
+ : isa<VPWidenLoadEVLRecipe>(this) ? Intrinsic::vp_gather
+ : Intrinsic::vp_scatter;
return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
Ctx.CostKind) +
- Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
- Ctx.CostKind, &Ingredient);
+ Ctx.TTI.getMemIntrinsicInstrCost(
+ {IID, Ty, Ptr, IsMasked, Alignment, &Ingredient}, Ctx.CostKind);
}
InstructionCost Cost = 0;
if (IsMasked) {
unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_load
: Intrinsic::masked_store;
- Cost +=
- Ctx.TTI.getMaskedMemoryOpCost({IID, Ty, Alignment, AS}, Ctx.CostKind);
+ Cost += Ctx.TTI.getMemIntrinsicInstrCost({IID, Ty, Alignment, AS},
+ Ctx.CostKind);
} else {
TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3715,17 +3719,17 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
if (!Consecutive || IsMasked)
return VPWidenMemoryRecipe::computeCost(VF, Ctx);
- // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
+ // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()
// here because the EVL recipes using EVL to replace the tail mask. But in the
// legacy model, it will always calculate the cost of mask.
- // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
+ // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
->getAddressSpace();
// FIXME: getMaskedMemoryOpCost assumes masked_* intrinsics.
// After migrating to getMemIntrinsicInstrCost, switch this to vp_load.
- InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
+ InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
{Intrinsic::masked_load, Ty, Alignment, AS}, Ctx.CostKind);
if (!Reverse)
return Cost;
@@ -3826,17 +3830,17 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
if (!Consecutive || IsMasked)
return VPWidenMemoryRecipe::computeCost(VF, Ctx);
- // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
+ // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()
// here because the EVL recipes using EVL to replace the tail mask. But in the
// legacy model, it will always calculate the cost of mask.
- // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
+ // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
->getAddressSpace();
// FIXME: getMaskedMemoryOpCost assumes masked_* intrinsics.
// After migrating to getMemIntrinsicInstrCost, switch this to vp_store.
- InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
+ InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
{Intrinsic::masked_store, Ty, Alignment, AS}, Ctx.CostKind);
if (!Reverse)
return Cost;
>From 228ead71f5610d08325d4ebd80c72024ba68fce9 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 27 Nov 2025 07:50:10 -0800
Subject: [PATCH 2/3] Use constructor explicitly
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 69 +++++++++-------
.../Transforms/Vectorize/LoopVectorize.cpp | 7 +-
.../Transforms/Vectorize/SLPVectorizer.cpp | 79 +++++++++++--------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 ++--
4 files changed, 98 insertions(+), 71 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index afbfd7746679f..71692599cce15 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1625,7 +1625,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load
: Intrinsic::masked_store;
Cost = thisT()->getMemIntrinsicInstrCost(
- {IID, VecTy, Alignment, AddressSpace}, CostKind);
+ MemIntrinsicCostAttributes(IID, VecTy, Alignment, AddressSpace),
+ CostKind);
} else
Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
CostKind);
@@ -1826,8 +1827,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Alignment = VPI->getPointerAlignment().valueOrOne();
bool VarMask = isa<Constant>(ICA.getArgs()[2]);
return thisT()->getMemIntrinsicInstrCost(
- {Intrinsic::vp_scatter, ICA.getArgTypes()[0], ICA.getArgs()[1],
- VarMask, Alignment, nullptr},
+ MemIntrinsicCostAttributes(Intrinsic::vp_scatter,
+ ICA.getArgTypes()[0], ICA.getArgs()[1],
+ VarMask, Alignment, nullptr),
CostKind);
}
if (ICA.getID() == Intrinsic::vp_gather) {
@@ -1843,8 +1845,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Alignment = VPI->getPointerAlignment().valueOrOne();
bool VarMask = isa<Constant>(ICA.getArgs()[1]);
return thisT()->getMemIntrinsicInstrCost(
- {Intrinsic::vp_gather, ICA.getReturnType(), ICA.getArgs()[0],
- VarMask, Alignment, nullptr},
+ MemIntrinsicCostAttributes(Intrinsic::vp_gather,
+ ICA.getReturnType(), ICA.getArgs()[0],
+ VarMask, Alignment, nullptr),
CostKind);
}
@@ -1950,17 +1953,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = I->getParamAlign(1).valueOrOne();
- return thisT()->getMemIntrinsicInstrCost({Intrinsic::masked_scatter,
- ICA.getArgTypes()[0], Args[1],
- VarMask, Alignment, I},
- CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(Intrinsic::masked_scatter,
+ ICA.getArgTypes()[0], Args[1], VarMask,
+ Alignment, I),
+ CostKind);
}
case Intrinsic::masked_gather: {
const Value *Mask = Args[1];
bool VarMask = !isa<Constant>(Mask);
Align Alignment = I->getParamAlign(0).valueOrOne();
return thisT()->getMemIntrinsicInstrCost(
- {Intrinsic::masked_gather, RetTy, Args[0], VarMask, Alignment, I},
+ MemIntrinsicCostAttributes(Intrinsic::masked_gather, RetTy, Args[0],
+ VarMask, Alignment, I),
CostKind);
}
case Intrinsic::masked_compressstore: {
@@ -1968,17 +1973,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const Value *Mask = Args[2];
Align Alignment = I->getParamAlign(1).valueOrOne();
return thisT()->getMemIntrinsicInstrCost(
- {Intrinsic::masked_compressstore, Data->getType(),
- !isa<Constant>(Mask), Alignment, I},
+ MemIntrinsicCostAttributes(Intrinsic::masked_compressstore,
+ Data->getType(), !isa<Constant>(Mask),
+ Alignment, I),
CostKind);
}
case Intrinsic::masked_expandload: {
const Value *Mask = Args[1];
Align Alignment = I->getParamAlign(0).valueOrOne();
- return thisT()->getMemIntrinsicInstrCost({Intrinsic::masked_expandload,
- RetTy, !isa<Constant>(Mask),
- Alignment, I},
- CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(Intrinsic::masked_expandload, RetTy,
+ !isa<Constant>(Mask), Alignment, I),
+ CostKind);
}
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
@@ -1990,7 +1996,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Align Alignment =
I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
return thisT()->getMemIntrinsicInstrCost(
- {IID, Data->getType(), Ptr, VarMask, Alignment, I}, CostKind);
+ MemIntrinsicCostAttributes(IID, Data->getType(), Ptr, VarMask,
+ Alignment, I),
+ CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
const Value *Ptr = Args[0];
@@ -2001,7 +2009,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Align Alignment =
I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
return thisT()->getMemIntrinsicInstrCost(
- {IID, RetTy, Ptr, VarMask, Alignment, I}, CostKind);
+ MemIntrinsicCostAttributes(IID, RetTy, Ptr, VarMask, Alignment, I),
+ CostKind);
}
case Intrinsic::stepvector: {
if (isa<ScalableVectorType>(RetTy))
@@ -2414,28 +2423,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_store: {
Type *Ty = Tys[0];
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
- return thisT()->getMemIntrinsicInstrCost({IID, Ty, TyAlign, 0}, CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(IID, Ty, TyAlign, 0), CostKind);
}
case Intrinsic::masked_load: {
Type *Ty = RetTy;
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
- return thisT()->getMemIntrinsicInstrCost({IID, Ty, TyAlign, 0}, CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(IID, Ty, TyAlign, 0), CostKind);
}
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getMemIntrinsicInstrCost({IID, Ty, /*Ptr=*/nullptr,
- /*VariableMask=*/true,
- Alignment, ICA.getInst()},
- CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr,
+ /*VariableMask=*/true, Alignment,
+ ICA.getInst()),
+ CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
auto *Ty = cast<VectorType>(ICA.getReturnType());
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
- return thisT()->getMemIntrinsicInstrCost({IID, Ty, /*Ptr=*/nullptr,
- /*VariableMask=*/true,
- Alignment, ICA.getInst()},
- CostKind);
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr,
+ /*VariableMask=*/true, Alignment,
+ ICA.getInst()),
+ CostKind);
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 26f9ca98291c7..a0c9fb2f91918 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5254,8 +5254,8 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned IID = I->getOpcode() == Instruction::Load
? Intrinsic::masked_load
: Intrinsic::masked_store;
- Cost +=
- TTI.getMemIntrinsicInstrCost({IID, VectorTy, Alignment, AS}, CostKind);
+ Cost += TTI.getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(IID, VectorTy, Alignment, AS), CostKind);
} else {
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -5319,7 +5319,8 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
: Intrinsic::masked_scatter;
return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
TTI.getMemIntrinsicInstrCost(
- {IID, VectorTy, Ptr, Legal->isMaskRequired(I), Alignment, I},
+ MemIntrinsicCostAttributes(IID, VectorTy, Ptr,
+ Legal->isMaskRequired(I), Alignment, I),
CostKind);
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f53d810562352..0eb8ad8d3c93d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6900,10 +6900,11 @@ static bool isMaskedLoadCompress(
ScalarLoadsCost;
InstructionCost LoadCost = 0;
if (IsMasked) {
- LoadCost = TTI.getMemIntrinsicInstrCost({Intrinsic::masked_load, LoadVecTy,
- CommonAlignment,
- LI->getPointerAddressSpace()},
- CostKind);
+ LoadCost = TTI.getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(Intrinsic::masked_load, LoadVecTy,
+ CommonAlignment,
+ LI->getPointerAddressSpace()),
+ CostKind);
} else {
LoadCost =
TTI.getMemoryOpCost(Instruction::Load, LoadVecTy, CommonAlignment,
@@ -7246,10 +7247,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ScalarGEPCost;
// The cost of masked gather.
InstructionCost MaskedGatherCost =
- TTI.getMemIntrinsicInstrCost({Intrinsic::masked_gather, VecTy,
- cast<LoadInst>(VL0)->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment},
- CostKind) +
+ TTI.getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(Intrinsic::masked_gather, VecTy,
+ cast<LoadInst>(VL0)->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment),
+ CostKind) +
(ProfitableGatherPointers ? 0 : VectorGEPCost);
InstructionCost GatherCost =
getScalarizationOverhead(TTI, ScalarTy, VecTy, DemandedElts,
@@ -7357,27 +7359,30 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
break;
case LoadsState::StridedVectorize:
VecLdCost += TTI.getMemIntrinsicInstrCost(
- {Intrinsic::experimental_vp_strided_load, SubVecTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment},
+ MemIntrinsicCostAttributes(
+ Intrinsic::experimental_vp_strided_load,
+ SubVecTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment),
CostKind) +
VectorGEPCost;
break;
case LoadsState::CompressVectorize:
VecLdCost += TTI.getMemIntrinsicInstrCost(
- {Intrinsic::masked_load, SubVecTy, CommonAlignment,
- LI0->getPointerAddressSpace()},
+ MemIntrinsicCostAttributes(
+ Intrinsic::masked_load, SubVecTy,
+ CommonAlignment, LI0->getPointerAddressSpace()),
CostKind) +
::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, SubVecTy,
{}, CostKind);
break;
case LoadsState::ScatterVectorize:
- VecLdCost +=
- TTI.getMemIntrinsicInstrCost(
- {Intrinsic::masked_gather, SubVecTy, LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment},
- CostKind) +
- VectorGEPCost;
+ VecLdCost += TTI.getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(
+ Intrinsic::masked_gather, SubVecTy,
+ LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment),
+ CostKind) +
+ VectorGEPCost;
break;
case LoadsState::Gather:
// Gathers are already calculated - ignore.
@@ -13331,9 +13336,10 @@ void BoUpSLP::transformNodes() {
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
- {Intrinsic::experimental_vp_strided_load, VecTy,
- BaseLI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, BaseLI},
+ MemIntrinsicCostAttributes(Intrinsic::experimental_vp_strided_load,
+ VecTy, BaseLI->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment,
+ BaseLI),
CostKind);
if (StridedCost < OriginalVecCost || ForceStridedLoads) {
// Strided load is more profitable than consecutive load + reverse -
@@ -13368,9 +13374,10 @@ void BoUpSLP::transformNodes() {
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
InstructionCost StridedCost = TTI->getMemIntrinsicInstrCost(
- {Intrinsic::experimental_vp_strided_store, VecTy,
- BaseSI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment, BaseSI},
+ MemIntrinsicCostAttributes(Intrinsic::experimental_vp_strided_store,
+ VecTy, BaseSI->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment,
+ BaseSI),
CostKind);
if (StridedCost < OriginalVecCost)
// Strided store is more profitable than reverse + consecutive store -
@@ -15138,9 +15145,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
VecLdCost = TTI->getMemIntrinsicInstrCost(
- {Intrinsic::experimental_vp_strided_load, StridedLoadTy,
- LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment},
+ MemIntrinsicCostAttributes(Intrinsic::experimental_vp_strided_load,
+ StridedLoadTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment),
CostKind);
if (StridedLoadTy != VecTy)
VecLdCost +=
@@ -15177,8 +15184,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
CommonAlignment, LI0->getPointerAddressSpace(), CostKind);
} else if (IsMasked) {
VecLdCost = TTI->getMemIntrinsicInstrCost(
- {Intrinsic::masked_load, LoadVecTy, CommonAlignment,
- LI0->getPointerAddressSpace()},
+ MemIntrinsicCostAttributes(Intrinsic::masked_load, LoadVecTy,
+ CommonAlignment,
+ LI0->getPointerAddressSpace()),
CostKind);
// TODO: include this cost into CommonCost.
VecLdCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
@@ -15197,8 +15205,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
VecLdCost = TTI->getMemIntrinsicInstrCost(
- {Intrinsic::masked_gather, VecTy, LI0->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment},
+ MemIntrinsicCostAttributes(Intrinsic::masked_gather, VecTy,
+ LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment),
CostKind);
break;
}
@@ -15240,9 +15249,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Align CommonAlignment =
computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
VecStCost = TTI->getMemIntrinsicInstrCost(
- {Intrinsic::experimental_vp_strided_store, VecTy,
- BaseSI->getPointerOperand(),
- /*VariableMask=*/false, CommonAlignment},
+ MemIntrinsicCostAttributes(Intrinsic::experimental_vp_strided_store,
+ VecTy, BaseSI->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment),
CostKind);
} else {
assert(E->State == TreeEntry::Vectorize &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7afc6c820a22a..cfdac9031887e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3601,15 +3601,17 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
Ctx.CostKind) +
Ctx.TTI.getMemIntrinsicInstrCost(
- {IID, Ty, Ptr, IsMasked, Alignment, &Ingredient}, Ctx.CostKind);
+ MemIntrinsicCostAttributes(IID, Ty, Ptr, IsMasked, Alignment,
+ &Ingredient),
+ Ctx.CostKind);
}
InstructionCost Cost = 0;
if (IsMasked) {
unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_load
: Intrinsic::masked_store;
- Cost += Ctx.TTI.getMemIntrinsicInstrCost({IID, Ty, Alignment, AS},
- Ctx.CostKind);
+ Cost += Ctx.TTI.getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(IID, Ty, Alignment, AS), Ctx.CostKind);
} else {
TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3730,7 +3732,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
// FIXME: getMaskedMemoryOpCost assumes masked_* intrinsics.
// After migrating to getMemIntrinsicInstrCost, switch this to vp_load.
InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
- {Intrinsic::masked_load, Ty, Alignment, AS}, Ctx.CostKind);
+ MemIntrinsicCostAttributes(Intrinsic::masked_load, Ty, Alignment, AS),
+ Ctx.CostKind);
if (!Reverse)
return Cost;
@@ -3841,7 +3844,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
// FIXME: getMaskedMemoryOpCost assumes masked_* intrinsics.
// After migrating to getMemIntrinsicInstrCost, switch this to vp_store.
InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
- {Intrinsic::masked_store, Ty, Alignment, AS}, Ctx.CostKind);
+ MemIntrinsicCostAttributes(Intrinsic::masked_store, Ty, Alignment, AS),
+ Ctx.CostKind);
if (!Reverse)
return Cost;
>From 960333b5e11da455959374ed4ab9c17dbfe6ea29 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 27 Nov 2025 20:42:28 -0800
Subject: [PATCH 3/3] Fix conflict after merge
---
.../llvm/Analysis/TargetTransformInfo.h | 53 +------------------
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 20 +------
llvm/lib/Analysis/TargetTransformInfo.cpp | 38 -------------
3 files changed, 3 insertions(+), 108 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index e10d7cfec8a58..e24e22da5681b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -129,12 +129,9 @@ class MemIntrinsicCostAttributes {
/// load/store to transform to the intrinsic.
const Instruction *I = nullptr;
-<<<<<<< HEAD
/// Address in memory.
const Value *Ptr = nullptr;
-=======
->>>>>>> pub/main
/// Vector type of the data to be loaded or stored.
Type *DataTy = nullptr;
@@ -158,15 +155,11 @@ class MemIntrinsicCostAttributes {
const Instruction *I = nullptr)
: I(I), Ptr(Ptr), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
Alignment(Alignment) {}
+
LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
Align Alignment, unsigned AddressSpace)
: DataTy(DataTy), IID(Id), AddressSpace(AddressSpace),
Alignment(Alignment) {}
- LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
- bool VariableMask, Align Alignment,
- const Instruction *I = nullptr)
- : I(I), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
- Alignment(Alignment) {}
LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
bool VariableMask, Align Alignment,
@@ -176,10 +169,7 @@ class MemIntrinsicCostAttributes {
Intrinsic::ID getID() const { return IID; }
const Instruction *getInst() const { return I; }
-<<<<<<< HEAD
const Value *getPointer() const { return Ptr; }
-=======
->>>>>>> pub/main
Type *getDataType() const { return DataTy; }
bool getVariableMask() const { return VariableMask; }
unsigned getAddressSpace() const { return AddressSpace; }
@@ -1629,47 +1619,6 @@ class TargetTransformInfo {
OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
const Instruction *I = nullptr) const;
-<<<<<<< HEAD
-=======
- /// \return The cost of masked Load and Store instructions.
- LLVM_ABI InstructionCost getMaskedMemoryOpCost(
- const MemIntrinsicCostAttributes &MICA,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
- /// \return The cost of Gather or Scatter operation
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
- /// \return The cost of Expand Load or Compress Store operation
- LLVM_ABI InstructionCost getExpandCompressMemoryOpCost(
- const MemIntrinsicCostAttributes &MICA,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
- /// \return The cost of strided memory operations.
- /// \p Opcode - is a type of memory access Load or Store
- /// \p DataTy - a vector type of the data to be loaded or stored
- /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
- /// \p VariableMask - true when the memory access is predicated with a mask
- /// that is not a compile-time constant
- /// \p Alignment - alignment of single element
- /// \p I - the optional original context instruction, if one exists, e.g. the
- /// load/store to transform or the call to the gather/scatter intrinsic
- LLVM_ABI InstructionCost getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
-
->>>>>>> pub/main
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 14128ba3d7c7a..b1beb68feca46 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1976,29 +1976,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const Value *Data = Args[0];
const Value *Mask = Args[2];
Align Alignment = I->getParamAlign(1).valueOrOne();
-<<<<<<< HEAD
return thisT()->getMemIntrinsicInstrCost(
- MemIntrinsicCostAttributes(Intrinsic::masked_compressstore,
- Data->getType(), !isa<Constant>(Mask),
-=======
- return thisT()->getExpandCompressMemoryOpCost(
MemIntrinsicCostAttributes(IID, Data->getType(), !isa<Constant>(Mask),
->>>>>>> pub/main
Alignment, I),
CostKind);
}
case Intrinsic::masked_expandload: {
const Value *Mask = Args[1];
Align Alignment = I->getParamAlign(0).valueOrOne();
-<<<<<<< HEAD
return thisT()->getMemIntrinsicInstrCost(
- MemIntrinsicCostAttributes(Intrinsic::masked_expandload, RetTy,
- !isa<Constant>(Mask), Alignment, I),
-=======
- return thisT()->getExpandCompressMemoryOpCost(
MemIntrinsicCostAttributes(IID, RetTy, !isa<Constant>(Mask),
Alignment, I),
->>>>>>> pub/main
CostKind);
}
case Intrinsic::experimental_vp_strided_store: {
@@ -3086,12 +3074,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_store:
return thisT()->getMaskedMemoryOpCost(MICA, CostKind);
case Intrinsic::masked_compressstore:
- case Intrinsic::masked_expandload: {
- unsigned Opcode = Id == Intrinsic::masked_expandload ? Instruction::Load
- : Instruction::Store;
- return thisT()->getExpandCompressMemoryOpCost(
- Opcode, DataTy, VariableMask, Alignment, CostKind, I);
- }
+ case Intrinsic::masked_expandload:
+ return thisT()->getExpandCompressMemoryOpCost(MICA, CostKind);
default:
llvm_unreachable("unexpected intrinsic");
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5fe24398583f2..c529d87502acd 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1190,44 +1190,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
return Cost;
}
-<<<<<<< HEAD
-=======
-InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
- const MemIntrinsicCostAttributes &MICA,
- TTI::TargetCostKind CostKind) const {
- InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(MICA, CostKind);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
-InstructionCost TargetTransformInfo::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert((!Cost.isValid() || Cost >= 0) &&
- "TTI should not produce negative costs!");
- return Cost;
-}
-
-InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
- const MemIntrinsicCostAttributes &MICA,
- TTI::TargetCostKind CostKind) const {
- InstructionCost Cost = TTIImpl->getExpandCompressMemoryOpCost(MICA, CostKind);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
-InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
- Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
- assert(Cost >= 0 && "TTI should not produce negative costs!");
- return Cost;
-}
-
->>>>>>> pub/main
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
More information about the llvm-commits
mailing list