[llvm] [ARM][SLP] Fix cost function for SLP Vectorization of ZExt/SExt (PR #122713)

Mon Jan 13 06:34:16 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-arm

Author: Nashe Mncube (nasherm)

<details>
<summary>Changes</summary>

PR #117350 made changes to the SLP vectorizer which introduced a regression on some ARM  benchmarks. This was due to the changes assuming that SExt/ZExt vector instructions have constant cost. This behaviour is expected for RISCV but not on ARM where we take into account source and destination type of SExt/ZExt instructions when calculating vector cost.


---
Full diff: https://github.com/llvm/llvm-project/pull/122713.diff


6 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+9) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+2) 
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+3) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+8) 
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+5) 
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+2-1) 


``````````diff

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 752313ab15858c..bac6500ed7184d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1184,6 +1184,11 @@ class TargetTransformInfo {
   /// \return true if vscale is known to be a power of 2
   bool isVScaleKnownToBeAPowerOfTwo() const;
 
+  // \return true if vector implementations assume that SExt and ZExt
+  // instructions have a fixed cost.
+  bool isSExtCostConstant() const;
+  bool isZExtCostConstant() const;
+
   /// \return True if the vectorization factor should be chosen to
   /// make the vector of the smallest element type match the size of a
   /// vector register. For wider element types, this could result in
@@ -2065,6 +2070,8 @@ class TargetTransformInfo::Concept {
   virtual std::optional<unsigned> getMaxVScale() const = 0;
   virtual std::optional<unsigned> getVScaleForTuning() const = 0;
   virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
+  virtual bool isSExtCostConstant() const = 0;
+  virtual bool isZExtCostConstant() const = 0;
   virtual bool
   shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
   virtual ElementCount getMinimumVF(unsigned ElemWidth,
@@ -2719,6 +2726,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool isVScaleKnownToBeAPowerOfTwo() const override {
     return Impl.isVScaleKnownToBeAPowerOfTwo();
   }
+  bool isSExtCostConstant() const override { return Impl.isSExtCostConstant(); }
+  bool isZExtCostConstant() const override { return Impl.isZExtCostConstant(); }
   bool shouldMaximizeVectorBandwidth(
       TargetTransformInfo::RegisterKind K) const override {
     return Impl.shouldMaximizeVectorBandwidth(K);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9c74b2a0c31df1..3120e27139d358 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -530,6 +530,8 @@ class TargetTransformInfoImplBase {
   std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
   std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
   bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
+  bool isSExtCostConstant() const { return true; }
+  bool isZExtCostConstant() const { return true; }
 
   bool
   shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c9f142d64ae9e4..5785aa8d74b61c 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -801,6 +801,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
   bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
 
+  bool isSExtCostConstant() const { return true; }
+  bool isZExtCostConstant() const { return true; }
+
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the demanded result elements need to be inserted and/or
   /// extracted from vectors.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b32dffa9f0fe86..63baea591bac45 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1405,6 +1405,14 @@ bool TargetTransformInfo::isProfitableToSinkOperands(
   return TTIImpl->isProfitableToSinkOperands(I, OpsToSink);
 }
 
+bool TargetTransformInfo::isSExtCostConstant() const {
+  return TTIImpl->isSExtCostConstant();
+}
+
+bool TargetTransformInfo::isZExtCostConstant() const {
+  return TTIImpl->isZExtCostConstant();
+}
+
 bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
   return TTIImpl->isVectorShiftByScalarCheap(Ty);
 }
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 3a4f940088b2e3..c2522b53987a3b 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -109,6 +109,11 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
 
   bool enableInterleavedAccessVectorization() { return true; }
 
+  // Cost model for vector SExt and ZExt takes into account
+  // source and destination vector type for MVE and NEON.
+  bool isSExtCostConstant() const { return false; }
+  bool isZExtCostConstant() const { return false; }
+
   TTI::AddressingModeKind
     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
 
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c4582df89213d8..7813c1248f9fcf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11440,7 +11440,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
                                 I->getOpcode());
           });
       if (IsArithmeticExtendedReduction &&
-          (VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt))
+          (VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt) &&
+          (TTI->isZExtCostConstant() && TTI->isSExtCostConstant()))
         return CommonCost;
       return CommonCost +
              TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind,

``````````

</details>


https://github.com/llvm/llvm-project/pull/122713