[llvm] 9239932 - [COST] Improve shuffle kind detection if shuffle mask is provided.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 29 09:43:45 PDT 2021


Author: Alexey Bataev
Date: 2021-04-29T09:42:56-07:00
New Revision: 92399322217917e67c0d72a55ec51ddc82251cf6

URL: https://github.com/llvm/llvm-project/commit/92399322217917e67c0d72a55ec51ddc82251cf6
DIFF: https://github.com/llvm/llvm-project/commit/92399322217917e67c0d72a55ec51ddc82251cf6.diff

LOG: [COST] Improve shuffle kind detection if shuffle mask is provided.

Added an extra analysis for better choosing of shuffle kind in
getShuffleCost functions for better cost estimation if mask was
provided.

Differential Revision: https://reviews.llvm.org/D100865

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c3660afd307f4..f472b11df844a 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -787,11 +787,43 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return OpCost;
   }
 
+  TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,
+                                              ArrayRef<int> Mask) const {
+    int Limit = Mask.size() * 2;
+    if (Mask.empty() ||
+        // Extra check required by isSingleSourceMaskImpl function (called by
+        // ShuffleVectorInst::isSingleSourceMask).
+        any_of(Mask, [Limit](int I) { return I >= Limit; }))
+      return Kind;
+    switch (Kind) {
+    case TTI::SK_PermuteSingleSrc:
+      if (ShuffleVectorInst::isReverseMask(Mask))
+        return TTI::SK_Reverse;
+      if (ShuffleVectorInst::isZeroEltSplatMask(Mask))
+        return TTI::SK_Broadcast;
+      break;
+    case TTI::SK_PermuteTwoSrc:
+      if (ShuffleVectorInst::isSelectMask(Mask))
+        return TTI::SK_Select;
+      if (ShuffleVectorInst::isTransposeMask(Mask))
+        return TTI::SK_Transpose;
+      break;
+    case TTI::SK_Select:
+    case TTI::SK_Reverse:
+    case TTI::SK_Broadcast:
+    case TTI::SK_Transpose:
+    case TTI::SK_InsertSubvector:
+    case TTI::SK_ExtractSubvector:
+      break;
+    }
+    return Kind;
+  }
+
   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
                                  ArrayRef<int> Mask, int Index,
                                  VectorType *SubTp) {
 
-    switch (Kind) {
+    switch (improveShuffleKindFromMask(Kind, Mask)) {
     case TTI::SK_Broadcast:
       return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
     case TTI::SK_Select:

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ead1bf95d25d2..eb188206fafc4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1620,6 +1620,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                                VectorType *Tp,
                                                ArrayRef<int> Mask, int Index,
                                                VectorType *SubTp) {
+  Kind = improveShuffleKindFromMask(Kind, Mask);
   if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
       Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
       Kind == TTI::SK_Reverse) {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f59979dd7c45a..6e564c0340ae2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1138,6 +1138,7 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
 InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                            VectorType *VT, ArrayRef<int> Mask,
                                            int Index, VectorType *SubTp) {
+  Kind = improveShuffleKindFromMask(Kind, Mask);
   if (ST->hasVOP3PInsts()) {
     if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
         DL.getTypeSizeInBits(VT->getElementType()) == 16) {

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index dad237c43e96f..93325f912ffc1 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1139,6 +1139,7 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
 InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                            VectorType *Tp, ArrayRef<int> Mask,
                                            int Index, VectorType *SubTp) {
+  Kind = improveShuffleKindFromMask(Kind, Mask);
   if (ST->hasNEON()) {
     if (Kind == TTI::SK_Broadcast) {
       static const CostTblEntry NEONDupTbl[] = {

diff  --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index c08d9a23ae43a..f405a3eb419fd 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -559,6 +559,7 @@ InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                                VectorType *Tp,
                                                ArrayRef<int> Mask, int Index,
                                                VectorType *SubTp) {
+  Kind = improveShuffleKindFromMask(Kind, Mask);
   if (ST->hasVector()) {
     unsigned NumVectors = getNumVectorRegs(Tp);
 

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index fa27dea61fc8a..e6bdb31e16b32 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -968,6 +968,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
   // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
 
+  Kind = improveShuffleKindFromMask(Kind, Mask);
   // Treat Transpose as 2-op shuffles - there's no 
diff erence in lowering.
   if (Kind == TTI::SK_Transpose)
     Kind = TTI::SK_PermuteTwoSrc;

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5a88b5c223d79..3e69a7d5b4b2a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4352,22 +4352,14 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
     }
     int FoundLane = findLaneForValue(VTE->Scalars, VTE->ReuseShuffleIndices, V);
     Mask[I] = (Entries.front() == VTE ? 0 : VF) + FoundLane;
-    // Extra check required by isSingleSourceMaskImpl function (called by
-    // ShuffleVectorInst::isSingleSourceMask).
-    if (Mask[I] >= 2 * E)
-      return None;
   }
-  if (Entries.size() == 1) {
-    if (ShuffleVectorInst::isReverseMask(Mask))
-      return TargetTransformInfo::SK_Reverse;
+  switch (Entries.size()) {
+  case 1:
     return TargetTransformInfo::SK_PermuteSingleSrc;
-  }
-  if (Entries.size() == 2) {
-    if (ShuffleVectorInst::isSelectMask(Mask))
-      return TargetTransformInfo::SK_Select;
-    if (ShuffleVectorInst::isTransposeMask(Mask))
-      return TargetTransformInfo::SK_Transpose;
+  case 2:
     return TargetTransformInfo::SK_PermuteTwoSrc;
+  default:
+    break;
   }
   return None;
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index 8376def0afb5b..f1f0e2e9bb0f2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -45,7 +45,7 @@ define void @fextr(i16* %ptr) {
 ; YAML-NEXT: Function:        fextr
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '-1'
+; YAML-NEXT:   - Cost:            '-4'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '4'
 


        


More information about the llvm-commits mailing list