[llvm] c27a0b2 - [SLP][RISCV] Account for offset folding in getPointersChainCost

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Mon May 22 05:55:36 PDT 2023


Author: Luke Lau
Date: 2023-05-22T13:55:30+01:00
New Revision: c27a0b21c5782d61c3c3125571239d08085565da

URL: https://github.com/llvm/llvm-project/commit/c27a0b21c5782d61c3c3125571239d08085565da
DIFF: https://github.com/llvm/llvm-project/commit/c27a0b21c5782d61c3c3125571239d08085565da.diff

LOG: [SLP][RISCV] Account for offset folding in getPointersChainCost

For a GEP in a pointer chain, if:
1) a pointer chain is unit-strided
2) the base pointer wasn't folded and is sitting in a register somewhere
3) the distance between the GEP and the base pointer is small enough and
   can be folded into the addressing mode of the using load/store

Then we can exclude that GEP from the total cost of the pointer chain,
as it will likely be folded away.

In order to check if 3) holds, we need to know the type of memory access
being made by the users of the pointer chain. For that, we need to pass
along a new argument to getPointersChainCost. (Using the source pointer
type of the GEP isn't accurate, see https://reviews.llvm.org/D149889 for
more details).

Also note that 2) is currently an assumption, and could be modelled more
accurately.

This prevents some unprofitable cases from being SLP vectorized on
RISC-V by making the scalar costs cheaper and closer to the actual
codegen.

For now the getPointersChainCost hook is duplicated for RISC-V to prevent
disturbing other targets, but could be merged back in and shared with
other targets in a following patch.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D149654

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/lib/Target/X86/X86TargetTransformInfo.h
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4fc56fcc4687..df3b24ae1a8c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -320,9 +320,11 @@ class TargetTransformInfo {
 
   /// Estimate the cost of a chain of pointers (typically pointer operands of a
   /// chain of loads or stores within same block) operations set when lowered.
+  /// \p AccessTy is the type of the loads/stores that will ultimately use the
+  /// \p Ptrs.
   InstructionCost
   getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
-                       const PointersChainInfo &Info,
+                       const PointersChainInfo &Info, Type *AccessTy,
                        TargetCostKind CostKind = TTI::TCK_RecipThroughput
 
   ) const;
@@ -1663,7 +1665,7 @@ class TargetTransformInfo::Concept {
                                      TTI::TargetCostKind CostKind) = 0;
   virtual InstructionCost
   getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
-                       const TTI::PointersChainInfo &Info,
+                       const TTI::PointersChainInfo &Info, Type *AccessTy,
                        TTI::TargetCostKind CostKind) = 0;
   virtual unsigned getInliningThresholdMultiplier() = 0;
   virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
@@ -2024,8 +2026,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                        const Value *Base,
                                        const PointersChainInfo &Info,
+                                       Type *AccessTy,
                                        TargetCostKind CostKind) override {
-    return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind);
+    return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
   }
   unsigned getInliningThresholdMultiplier() override {
     return Impl.getInliningThresholdMultiplier();

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 131e1e3f4b91..d7b1538d640e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1041,6 +1041,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
   InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                        const Value *Base,
                                        const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
                                        TTI::TargetCostKind CostKind) {
     InstructionCost Cost = TTI::TCC_Free;
     // In the basic model we take into account GEP instructions only

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index f7da4617b449..a7b89b2df452 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -230,10 +230,11 @@ TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
 
 InstructionCost TargetTransformInfo::getPointersChainCost(
     ArrayRef<const Value *> Ptrs, const Value *Base,
-    const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) const {
+    const TTI::PointersChainInfo &Info, Type *AccessTy,
+    TTI::TargetCostKind CostKind) const {
   assert((Base || !Info.isSameBase()) &&
          "If pointers have same base address it has to be provided.");
-  return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind);
+  return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
 }
 
 unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 528c2072be9d..880d7adb120f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1592,6 +1592,55 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
   }
 }
 
+// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.
+InstructionCost RISCVTTIImpl::getPointersChainCost(
+    ArrayRef<const Value *> Ptrs, const Value *Base,
+    const TTI::PointersChainInfo &Info, Type *AccessTy,
+    TTI::TargetCostKind CostKind) {
+  InstructionCost Cost = TTI::TCC_Free;
+  // In the basic model we take into account GEP instructions only
+  // (although here can come alloca instruction, a value, constants and/or
+  // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
+  // pointer). Typically, if Base is a not a GEP-instruction and all the
+  // pointers are relative to the same base address, all the rest are
+  // either GEP instructions, PHIs, bitcasts or constants. When we have same
+  // base, we just calculate cost of each non-Base GEP as an ADD operation if
+  // any their index is a non-const.
+  // If no known dependecies between the pointers cost is calculated as a sum
+  // of costs of GEP instructions.
+  for (auto [I, V] : enumerate(Ptrs)) {
+    const auto *GEP = dyn_cast<GetElementPtrInst>(V);
+    if (!GEP)
+      continue;
+    if (Info.isSameBase() && V != Base) {
+      if (GEP->hasAllConstantIndices())
+        continue;
+      // If the chain is unit-stride and BaseReg + stride*i is a legal
+      // addressing mode, then presume the base GEP is sitting around in a
+      // register somewhere and check if we can fold the offset relative to
+      // it.
+      unsigned Stride = DL.getTypeStoreSize(AccessTy);
+      if (Info.isUnitStride() &&
+          isLegalAddressingMode(AccessTy,
+                                /* BaseGV */ nullptr,
+                                /* BaseOffset */ Stride * I,
+                                /* HasBaseReg */ true,
+                                /* Scale */ 0,
+                                GEP->getType()->getPointerAddressSpace()))
+        continue;
+      Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,
+                                     {TTI::OK_AnyValue, TTI::OP_None},
+                                     {TTI::OK_AnyValue, TTI::OP_None},
+                                     std::nullopt);
+    } else {
+      SmallVector<const Value *> Indices(GEP->indices());
+      Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
+                         Indices, CostKind);
+    }
+  }
+  return Cost;
+}
+
 void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                            TTI::UnrollingPreferences &UP,
                                            OptimizationRemarkEmitter *ORE) {

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 5319bff98f70..e96f93cd98fa 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -106,6 +106,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
                                         Align Alignment, unsigned AddressSpace,
                                         TTI::TargetCostKind CostKind);
 
+  InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+                                       const Value *Base,
+                                       const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
+                                       TTI::TargetCostKind CostKind);
+
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP,
                                OptimizationRemarkEmitter *ORE);

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index e2de1db698d6..be9f26457642 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4943,9 +4943,11 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
   return Cost + LT.first;
 }
 
-InstructionCost X86TTIImpl::getPointersChainCost(
-    ArrayRef<const Value *> Ptrs, const Value *Base,
-    const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) {
+InstructionCost
+X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
+                                 const Value *Base,
+                                 const TTI::PointersChainInfo &Info,
+                                 Type *AccessTy, TTI::TargetCostKind CostKind) {
   if (Info.isSameBase() && Info.isKnownStride()) {
     // If all the pointers have known stride all the 
diff erences are translated
     // into constants. X86 memory addressing allows encoding it into
@@ -4957,7 +4959,7 @@ InstructionCost X86TTIImpl::getPointersChainCost(
     }
     return TTI::TCC_Free;
   }
-  return BaseT::getPointersChainCost(Ptrs, Base, Info, CostKind);
+  return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
 }
 
 InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index f182aa7b7bbf..2c5105d2f03f 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -180,6 +180,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                        const Value *Base,
                                        const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
                                        TTI::TargetCostKind CostKind);
   InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
                                             const SCEV *Ptr);

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a1079d44114d..75780ca2f0e7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7394,7 +7394,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       // stay in vectorized code due to uses outside of these scalar
       // loads/stores.
       ScalarCost = TTI->getPointersChainCost(
-          Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), CostKind);
+          Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
+          CostKind);
 
       SmallVector<const Value *> PtrsRetainedInVecCode;
       for (Value *V : Ptrs) {
@@ -7420,7 +7421,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       }
       VecCost = TTI->getPointersChainCost(
           PtrsRetainedInVecCode, BasePtr,
-          TTI::PointersChainInfo::getKnownStride(), CostKind);
+          TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
     } else {
       // Case 1: Ptrs are the arguments of loads that we are going to transform
       // into masked gather load intrinsic.
@@ -7436,7 +7437,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
               ? TTI::PointersChainInfo::getUnknownStride()
               : TTI::PointersChainInfo::getKnownStride();
 
-      ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, CostKind);
+      ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
+                                             CostKind);
 
       // Remark: it not quite correct to use scalar GEP cost for a vector GEP,
       // but it's not clear how to do that without having vector GEP arguments

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
index 6a217609ab77..3872cb0c3a34 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
@@ -69,7 +69,7 @@ entry:
   ret void
 }
 
-; FIXME: When computing the scalar pointers chain cost here, there is a cost of
+; When computing the scalar pointers chain cost here, there is a cost of
 ; 1 for the base pointer, and the rest can be folded in, so the scalar cost
 ; should be 1.
 define void @h(ptr %dest, i32 %i) {
@@ -86,7 +86,7 @@ entry:
 ; YAML-NEXT: Function:        h
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '-5'
+; YAML-NEXT:   - Cost:            '-2'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '2'
   %p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
index 1f6a28ea4804..8f12501f6977 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
@@ -2,7 +2,9 @@
 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
 ; RUN: -riscv-v-slp-max-vf=0 -S | FileCheck %s
 
-; FIXME: This should not be vectorized
+; This shouldn't be vectorized as the extra address computation required for the
+; vector store make it unprofitable (vle/vse don't have an offset in their
+; addressing modes)
 
 %struct.2i32 = type { i32, i32 }
 
@@ -10,7 +12,9 @@ define void @splat_store_v2i32(ptr %dest, i64 %i) {
 ; CHECK-LABEL: @splat_store_v2i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr [[STRUCT_2I32:%.*]], ptr [[DEST:%.*]], i64 [[I:%.*]], i32 0
-; CHECK-NEXT:    store <2 x i32> <i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr [[STRUCT_2I32]], ptr [[DEST]], i64 [[I]], i32 1
+; CHECK-NEXT:    store i32 1, ptr [[P2]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:


        


More information about the llvm-commits mailing list