[llvm] ba74bb3 - [SLP]Fix reused extracts cost.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 2 10:53:56 PST 2021


Author: Alexey Bataev
Date: 2021-12-02T10:52:00-08:00
New Revision: ba74bb3a226e1b4660537f274627285b1bf41ee1

URL: https://github.com/llvm/llvm-project/commit/ba74bb3a226e1b4660537f274627285b1bf41ee1
DIFF: https://github.com/llvm/llvm-project/commit/ba74bb3a226e1b4660537f274627285b1bf41ee1.diff

LOG: [SLP]Fix reused extracts cost.

If the extractelement instruction is used multiple times in the
different tree entries (either vectorized, or gathered), need to
compensate the scalar cost of such instructions. They are completely
removed if all users are part of the tree but we need to compensate the
cost only once for each instruction.

Differential Revision: https://reviews.llvm.org/D114958

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 95061e9053fa..335ad6c85387 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4287,8 +4287,8 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
 bool BoUpSLP::areAllUsersVectorized(Instruction *I,
                                     ArrayRef<Value *> VectorizedVals) const {
   return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
-         llvm::all_of(I->users(), [this](User *U) {
-           return ScalarToTreeEntry.count(U) > 0;
+         all_of(I->users(), [this](User *U) {
+           return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U);
          });
 }
 
@@ -4442,9 +4442,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
   // FIXME: it tries to fix a problem with MSVC buildbots.
   TargetTransformInfo &TTIRef = *TTI;
   auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
-                               VectorizedVals](InstructionCost &Cost,
-                                               bool IsGather) {
+                               VectorizedVals, E](InstructionCost &Cost) {
     DenseMap<Value *, int> ExtractVectorsTys;
+    SmallPtrSet<Value *, 4> CheckedExtracts;
     for (auto *V : VL) {
       if (isa<UndefValue>(V))
         continue;
@@ -4452,7 +4452,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       // instruction itself is not going to be vectorized, consider this
       // instruction as dead and remove its cost from the final cost of the
       // vectorized tree.
-      if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals))
+      // Also, avoid adjusting the cost for extractelements with multiple uses
+      // in 
diff erent graph entries.
+      const TreeEntry *VE = getTreeEntry(V);
+      if (!CheckedExtracts.insert(V).second ||
+          !areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
+          (VE && VE != E))
         continue;
       auto *EE = cast<ExtractElementInst>(V);
       Optional<unsigned> EEIdx = getExtractIndex(EE);
@@ -4549,11 +4554,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       }
       return GatherCost;
     }
-    if (isSplat(VL)) {
-      // Found the broadcasting of the single scalar, calculate the cost as the
-      // broadcast.
-      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
-    }
     if ((E->getOpcode() == Instruction::ExtractElement ||
          all_of(E->Scalars,
                 [](Value *V) {
@@ -4571,13 +4571,18 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
         // single input vector or of 2 input vectors.
         InstructionCost Cost =
             computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
-        AdjustExtractsCost(Cost, /*IsGather=*/true);
+        AdjustExtractsCost(Cost);
         if (NeedToShuffleReuses)
           Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
                                       FinalVecTy, E->ReuseShuffleIndices);
         return Cost;
       }
     }
+    if (isSplat(VL)) {
+      // Found the broadcasting of the single scalar, calculate the cost as the
+      // broadcast.
+      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
+    }
     InstructionCost ReuseShuffleCost = 0;
     if (NeedToShuffleReuses)
       ReuseShuffleCost = TTI->getShuffleCost(
@@ -4755,7 +4760,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
               TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
         }
       } else {
-        AdjustExtractsCost(CommonCost, /*IsGather=*/false);
+        AdjustExtractsCost(CommonCost);
       }
       return CommonCost;
     }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll
index c47f255f0bfe..31696752bbb3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll
@@ -2,24 +2,25 @@
 ; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -pass-remarks-output=%t | FileCheck %s
 ; RUN: FileCheck %s --input-file=%t --check-prefix=YAML
 
-; YAML: --- !Missed
+; YAML: --- !Passed
 ; YAML: Pass:            slp-vectorizer
-; YAML: Name:            NotBeneficial
+; YAML: Name:            VectorizedList
 ; YAML: Function:        multi_uses
 ; YAML: Args:
-; YAML:  - String:          'List vectorization was possible but not beneficial with cost '
-; YAML:  - Cost:            '0'
-; YAML:  - String:          ' >= '
-; YAML:  - Treshold:        '0'
+; YAML:  - String:          'SLP vectorized with cost '
+; YAML:  - Cost:            '-1'
+; YAML:  - String:          ' and with tree size '
+; YAML:  - TreeSize:        '3'
 
 define float @multi_uses(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @multi_uses(
-; CHECK-NEXT:    [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT:    [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
 ; CHECK-NEXT:    [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1
-; CHECK-NEXT:    [[X0X0:%.*]] = fmul float [[X0]], [[Y1]]
-; CHECK-NEXT:    [[X1X1:%.*]] = fmul float [[X1]], [[Y1]]
-; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Y1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
 ; CHECK-NEXT:    ret float [[ADD]]
 ;
   %x0 = extractelement <2 x float> %x, i32 0


        


More information about the llvm-commits mailing list