[llvm] [AArch64] Add costs for ST3 and ST4 instructions, modelled as store(shuffle). (PR #87934)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 7 10:52:03 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-backend-x86
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This tries to add some costs for the shuffle in a ST3/ST4 instruction, which are represented in LLVM IR as store(interleaving shuffle). In order to detect the store, it needs to add a CxtI context instruction to check the users of the shuffle. LD3 and LD4 are added, LD2 should be a zip1 shuffle, which will be added in another patch.
It should help fix some of the regressions from #<!-- -->87510.
---
Patch is 49.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87934.diff
22 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+13-13)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+25-19)
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+2-1)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+3-3)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+21-8)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+2-1)
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp (+2-1)
- (modified) llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (+2-1)
- (modified) llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+2-1)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (+4-6)
- (modified) llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+4-6)
- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.h (+2-1)
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+3-2)
- (modified) llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll (+21-21)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index fa9392b86c15b9..58c69ac939763a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1291,12 +1291,11 @@ class TargetTransformInfo {
/// passed through \p Args, which helps improve the cost estimation in some
/// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
- InstructionCost
- getShuffleCost(ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask = std::nullopt,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- int Index = 0, VectorType *SubTp = nullptr,
- ArrayRef<const Value *> Args = std::nullopt) const;
+ InstructionCost getShuffleCost(
+ ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
+ VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr) const;
/// Represents a hint about the context in which a cast is used.
///
@@ -2008,11 +2007,10 @@ class TargetTransformInfo::Concept {
const SmallBitVector &OpcodeMask,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
- virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask,
- TTI::TargetCostKind CostKind,
- int Index, VectorType *SubTp,
- ArrayRef<const Value *> Args) = 0;
+ virtual InstructionCost
+ getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind,
@@ -2647,8 +2645,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
- ArrayRef<const Value *> Args) override {
- return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) override {
+ return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
+ CxtI);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 63c2ef8912b29c..5f17d511d4b835 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -579,10 +579,12 @@ class TargetTransformInfoImplBase {
return InstructionCost::getInvalid();
}
- InstructionCost
- getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
- TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
- ArrayRef<const Value *> Args = std::nullopt) const {
+ InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr) const {
return 1;
}
@@ -1341,13 +1343,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
Mask, CostKind, SubIndex, VecTy,
- Operands);
+ Operands, Shuffle);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
- Operands);
+ Operands, Shuffle);
int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
@@ -1374,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
return TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
- AdjustMask, CostKind, 0, nullptr);
+ AdjustMask, CostKind, 0, nullptr, Shuffle);
}
// Narrowing shuffle - perform shuffle at original wider width and
@@ -1383,13 +1385,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
- VecSrcTy, AdjustMask, CostKind, 0, nullptr);
+ VecSrcTy, AdjustMask, CostKind, 0, nullptr, Shuffle);
SmallVector<int, 16> ExtractMask(Mask.size());
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
- return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector,
- VecSrcTy, ExtractMask,
- CostKind, 0, VecTy);
+ return ShuffleCost + TargetTTI->getShuffleCost(
+ TTI::SK_ExtractSubvector, VecSrcTy,
+ ExtractMask, CostKind, 0, VecTy, Shuffle);
}
if (Shuffle->isIdentity())
@@ -1397,35 +1399,39 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
- 0, nullptr, Operands);
+ 0, nullptr, Operands, Shuffle);
if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
- 0, nullptr, Operands);
+ 0, nullptr, Operands, Shuffle);
if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
- CostKind, 0, nullptr, Operands);
+ CostKind, 0, nullptr, Operands,
+ Shuffle);
if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
- CostKind, 0, nullptr, Operands);
+ CostKind, 0, nullptr, Operands,
+ Shuffle);
if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
- CostKind, 0, nullptr, Operands);
+ CostKind, 0, nullptr, Operands,
+ Shuffle);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
- FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
+ Shuffle);
if (Shuffle->isSplice(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
- SubIndex, nullptr, Operands);
+ SubIndex, nullptr, Operands, Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
- CostKind, 0, nullptr, Operands);
+ CostKind, 0, nullptr, Operands, Shuffle);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 42d8f74fd427fb..f0bc1b7e205bee 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1020,7 +1020,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
- ArrayRef<const Value *> Args = std::nullopt) {
+ ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr) {
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
case TTI::SK_Broadcast:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5f933b4587843c..33c899fe889990 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -916,9 +916,9 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
InstructionCost TargetTransformInfo::getShuffleCost(
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
- ArrayRef<const Value *> Args) const {
- InstructionCost Cost =
- TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
+ ArrayRef<const Value *> Args, const Instruction *CxtI) const {
+ InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
+ Index, SubTp, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ee7137b92445bb..0fe1847ecf945b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3815,18 +3815,30 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
return LegalizationCost * LT.first;
}
-InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
- VectorType *Tp,
- ArrayRef<int> Mask,
- TTI::TargetCostKind CostKind,
- int Index, VectorType *SubTp,
- ArrayRef<const Value *> Args) {
+InstructionCost AArch64TTIImpl::getShuffleCost(
+ TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args, const Instruction *CxtI) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+
// If we have a Mask, and the LT is being legalized somehow, split the Mask
// into smaller vectors and sum the cost of each shuffle.
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
+
+ // Check for ST3/ST4 instructions, which are represented in llvm IR as
+ // store(interleaving-shuffle). The shuffle cost could potentially be free,
+ // but we model it with a cost of LT.first so that LD3/LD3 have a higher
+ // cost than just the store.
+ if ((ShuffleVectorInst::isInterleaveMask(
+ Mask, 4, Tp->getElementCount().getKnownMinValue() * 2) ||
+ ShuffleVectorInst::isInterleaveMask(
+ Mask, 3, Tp->getElementCount().getKnownMinValue() * 2)) &&
+ !ShuffleVectorInst::isZeroEltSplatMask(
+ Mask, Tp->getElementCount().getKnownMinValue()))
+ return LT.first;
+
unsigned TpNumElts = Mask.size();
unsigned LTNumElts = LT.second.getVectorNumElements();
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
@@ -3874,7 +3886,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (NumSources <= 2)
Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
: TTI::SK_PermuteTwoSrc,
- NTp, NMask, CostKind, 0, nullptr, Args);
+ NTp, NMask, CostKind, 0, nullptr, Args, CxtI);
else if (any_of(enumerate(NMask), [&](const auto &ME) {
return ME.value() % LTNumElts == ME.index();
}))
@@ -4055,7 +4067,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Restore optimal kind.
if (IsExtractSubvector)
Kind = TTI::SK_ExtractSubvector;
- return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
+ CxtI);
}
static bool containsDecreasingPointers(Loop *TheLoop,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index de39dea2be43e1..dba384481f6a34 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -393,7 +393,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
- ArrayRef<const Value *> Args = std::nullopt);
+ ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr);
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 31077dbc0b2cc4..84320d296a037b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1127,7 +1127,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
- ArrayRef<const Value *> Args) {
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index cd8e9fd10bbf21..0dab3a98277943 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -234,7 +234,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
- ArrayRef<const Value *> Args = std::nullopt);
+ ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3be894ad3bef2c..ee87f7f0e555ef 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1212,7 +1212,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
- ArrayRef<const Value *> Args) {
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index bb4b321b530091..04b32194f806f6 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -220,7 +220,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
- ArrayRef<const Value *> Args = std::nullopt);
+ ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr);
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 458b8717256f24..f47fcff5d60259 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -230,7 +230,8 @@ InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
- ArrayRef<const Value *> Args) {
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
return 1;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index fdb34f308e641e..9689f2f5bb865c 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -122,7 +122,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
- ArrayRef<const Value *> Args = std::nullopt);
+ ArrayRef<const Value *> Args = std::nullopt,
+ const Instruction *CxtI = nullptr);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/87934
More information about the llvm-commits
mailing list