[llvm] [ARM][SLP] Fix cost function for SLP Vectorization of ZExt/SExt (PR #122713)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 06:34:16 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-llvm-analysis
Author: Nashe Mncube (nasherm)
<details>
<summary>Changes</summary>
PR #<!-- -->117350 made changes to the SLP vectorizer which introduced a regression on some ARM benchmarks. This was due to the changes assuming that SExt/ZExt vector instructions have constant cost. This behaviour is expected for RISCV but not on ARM where we take into account source and destination type of SExt/ZExt instructions when calculating vector cost.
---
Full diff: https://github.com/llvm/llvm-project/pull/122713.diff
6 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+9)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+2)
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+3)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+8)
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+5)
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+2-1)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 752313ab15858c..bac6500ed7184d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1184,6 +1184,11 @@ class TargetTransformInfo {
/// \return true if vscale is known to be a power of 2
bool isVScaleKnownToBeAPowerOfTwo() const;
+ // \return true if vector implementations assume that SExt and ZExt
+ // instructions have a fixed cost.
+ bool isSExtCostConstant() const;
+ bool isZExtCostConstant() const;
+
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
@@ -2065,6 +2070,8 @@ class TargetTransformInfo::Concept {
virtual std::optional<unsigned> getMaxVScale() const = 0;
virtual std::optional<unsigned> getVScaleForTuning() const = 0;
virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
+ virtual bool isSExtCostConstant() const = 0;
+ virtual bool isZExtCostConstant() const = 0;
virtual bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
virtual ElementCount getMinimumVF(unsigned ElemWidth,
@@ -2719,6 +2726,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool isVScaleKnownToBeAPowerOfTwo() const override {
return Impl.isVScaleKnownToBeAPowerOfTwo();
}
+ bool isSExtCostConstant() const override { return Impl.isSExtCostConstant(); }
+ bool isZExtCostConstant() const override { return Impl.isZExtCostConstant(); }
bool shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const override {
return Impl.shouldMaximizeVectorBandwidth(K);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9c74b2a0c31df1..3120e27139d358 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -530,6 +530,8 @@ class TargetTransformInfoImplBase {
std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
+ bool isSExtCostConstant() const { return true; }
+ bool isZExtCostConstant() const { return true; }
bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c9f142d64ae9e4..5785aa8d74b61c 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -801,6 +801,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
+ bool isSExtCostConstant() const { return true; }
+ bool isZExtCostConstant() const { return true; }
+
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
/// extracted from vectors.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b32dffa9f0fe86..63baea591bac45 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1405,6 +1405,14 @@ bool TargetTransformInfo::isProfitableToSinkOperands(
return TTIImpl->isProfitableToSinkOperands(I, OpsToSink);
}
+bool TargetTransformInfo::isSExtCostConstant() const {
+ return TTIImpl->isSExtCostConstant();
+}
+
+bool TargetTransformInfo::isZExtCostConstant() const {
+ return TTIImpl->isZExtCostConstant();
+}
+
bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
return TTIImpl->isVectorShiftByScalarCheap(Ty);
}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 3a4f940088b2e3..c2522b53987a3b 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -109,6 +109,11 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
bool enableInterleavedAccessVectorization() { return true; }
+ // Cost model for vector SExt and ZExt takes into account
+ // source and destination vector type for MVE and NEON.
+ bool isSExtCostConstant() const { return false; }
+ bool isZExtCostConstant() const { return false; }
+
TTI::AddressingModeKind
getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c4582df89213d8..7813c1248f9fcf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11440,7 +11440,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
I->getOpcode());
});
if (IsArithmeticExtendedReduction &&
- (VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt))
+ (VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt) &&
+ (TTI->isZExtCostConstant() && TTI->isSExtCostConstant()))
return CommonCost;
return CommonCost +
TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind,
``````````
</details>
https://github.com/llvm/llvm-project/pull/122713
More information about the llvm-commits
mailing list