[llvm] 4212ef8 - Revert "[SLP]Further improvement of the cost model for scalars used in buildvectors."

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon May 9 13:59:01 PDT 2022


Author: Alexey Bataev
Date: 2022-05-09T13:46:06-07:00
New Revision: 4212ef8a0e5ccdcba41e132501ee6f7dbbf226fb

URL: https://github.com/llvm/llvm-project/commit/4212ef8a0e5ccdcba41e132501ee6f7dbbf226fb
DIFF: https://github.com/llvm/llvm-project/commit/4212ef8a0e5ccdcba41e132501ee6f7dbbf226fb.diff

LOG: Revert "[SLP]Further improvement of the cost model for scalars used in buildvectors."

This reverts commit 99f31acfce338417fea3c14983d6f8fedc8ed043 and several
others to fix detected crashes, reported in https://reviews.llvm.org/D115750

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
    llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll

Removed: 
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8abe0bddbc62..4319af0df046 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6383,122 +6383,6 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
   return false;
 }
 
-/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
-/// buildvector sequence.
-static bool isFirstInsertElement(const InsertElementInst *IE1,
-                                 const InsertElementInst *IE2) {
-  const auto *I1 = IE1;
-  const auto *I2 = IE2;
-  do {
-    if (I2 == IE1)
-      return true;
-    if (I1 == IE2)
-      return false;
-    if (I1)
-      I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
-    if (I2)
-      I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
-  } while (I1 || I2);
-  llvm_unreachable("Two 
diff erent buildvectors not expected.");
-}
-
-/// Does the analysis of the provided shuffle masks and performs the requested
-/// actions on the vectors with the given shuffle masks. It tries to do it in
-/// several steps.
-/// 1. If the Base vector is not undef vector, resizing the very first mask to
-/// have common VF and perform action for 2 input vectors (including non-undef
-/// Base). Other shuffle masks are combined with the resulting after the 1 stage
-/// and processed as a shuffle of 2 elements.
-/// 2. If the Base is undef vector and have only 1 shuffle mask, perform the
-/// action only for 1 vector with the given mask, if it is not the identity
-/// mask.
-/// 3. If > 2 masks are used, perform the remaining shuffle actions for 2
-/// vectors, combing the masks properly between the steps.
-template <typename T>
-static T *performExtractsShuffleAction(
-    MutableArrayRef<std::pair<T *, SmallVector<int>>> ShuffleMask, Value *Base,
-    function_ref<unsigned(T *)> GetVF,
-    function_ref<std::pair<T *, bool>(T *, ArrayRef<int>)> ResizeAction,
-    function_ref<T *(ArrayRef<int>, ArrayRef<T *>)> Action) {
-  assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts.");
-  SmallVector<int> Mask(ShuffleMask.begin()->second);
-  auto VMIt = std::next(ShuffleMask.begin());
-  T *Prev = nullptr;
-  bool IsBaseNotUndef = !isUndefVector(Base);
-  if (IsBaseNotUndef) {
-    // Base is not undef, need to combine it with the next subvectors.
-    std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
-    for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
-      if (Mask[Idx] == UndefMaskElem)
-        Mask[Idx] = Idx;
-      else
-        Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
-    }
-    Prev = Action(Mask, {nullptr, Res.first});
-  } else if (ShuffleMask.size() == 1) {
-    // Base is undef and only 1 vector is shuffled - perform the action only for
-    // single vector, if the mask is not the identity mask.
-    std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
-    if (Res.second)
-      // Identity mask is found.
-      Prev = Res.first;
-    else
-      Prev = Action(Mask, {ShuffleMask.begin()->first});
-  } else {
-    // Base is undef and at least 2 input vectors shuffled - perform 2 vectors
-    // shuffles step by step, combining shuffle between the steps.
-    unsigned Vec1VF = GetVF(ShuffleMask.begin()->first);
-    unsigned Vec2VF = GetVF(VMIt->first);
-    if (Vec1VF == Vec2VF) {
-      // No need to resize the input vectors since they are of the same size, we
-      // can shuffle them directly.
-      ArrayRef<int> SecMask = VMIt->second;
-      for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
-        if (SecMask[I] != UndefMaskElem) {
-          assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
-          Mask[I] = SecMask[I] + Vec1VF;
-        }
-      }
-      Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first});
-    } else {
-      // Vectors of 
diff erent sizes - resize and reshuffle.
-      std::pair<T *, bool> Res1 =
-          ResizeAction(ShuffleMask.begin()->first, Mask);
-      std::pair<T *, bool> Res2 = ResizeAction(VMIt->first, VMIt->second);
-      ArrayRef<int> SecMask = VMIt->second;
-      for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
-        if (Mask[I] != UndefMaskElem) {
-          assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars.");
-          if (Res1.second)
-            Mask[I] = I;
-        } else if (SecMask[I] != UndefMaskElem) {
-          assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
-          Mask[I] = (Res2.second ? I : SecMask[I]) + VF;
-        }
-      }
-      Prev = Action(Mask, {Res1.first, Res2.first});
-    }
-    VMIt = std::next(VMIt);
-  }
-  // Perform requested actions for the remaining masks/vectors.
-  for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) {
-    // Shuffle other input vectors, if any.
-    std::pair<T *, bool> Res = ResizeAction(VMIt->first, VMIt->second);
-    ArrayRef<int> SecMask = VMIt->second;
-    for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
-      if (SecMask[I] != UndefMaskElem) {
-        assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) &&
-               "Multiple uses of scalars.");
-        Mask[I] = (Res.second ? I : SecMask[I]) + VF;
-      } else if (Mask[I] != UndefMaskElem) {
-        Mask[I] = I;
-      }
-    }
-    Prev = Action(Mask, {Prev, Res.first});
-  }
-  return Prev;
-}
-
 InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
   InstructionCost Cost = 0;
   LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -6519,8 +6403,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
 
   SmallPtrSet<Value *, 16> ExtractCostCalculated;
   InstructionCost ExtractCost = 0;
-  SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
-  SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
+  SmallVector<unsigned> VF;
+  SmallVector<SmallVector<int>> ShuffleMask;
+  SmallVector<Value *> FirstUsers;
   SmallVector<APInt> DemandedElts;
   for (ExternalUser &EU : ExternalUses) {
     // We only add extract cost once for the same scalar.
@@ -6549,19 +6434,14 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
       if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
         Optional<unsigned> InsertIdx = getInsertIndex(VU);
         if (InsertIdx) {
-          const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
-          auto *It =
-              find_if(FirstUsers,
-                      [VU](const std::pair<Value *, const TreeEntry *> &Pair) {
-                        return areTwoInsertFromSameBuildVector(
-                            VU, cast<InsertElementInst>(Pair.first));
-                      });
+          auto *It = find_if(FirstUsers, [VU](Value *V) {
+            return areTwoInsertFromSameBuildVector(VU,
+                                                   cast<InsertElementInst>(V));
+          });
           int VecId = -1;
           if (It == FirstUsers.end()) {
-            (void)ShuffleMasks.emplace_back();
-            SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
-            if (Mask.empty())
-              Mask.assign(FTy->getNumElements(), UndefMaskElem);
+            VF.push_back(FTy->getNumElements());
+            ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
             // Find the insertvector, vectorized in tree, if any.
             Value *Base = VU;
             while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
@@ -6569,41 +6449,22 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
               if (const TreeEntry *E = getTreeEntry(IEBase)) {
                 VU = IEBase;
                 do {
-                  IEBase = cast<InsertElementInst>(Base);
-                  int Idx = *getInsertIndex(IEBase);
-                  assert(Mask[Idx] == UndefMaskElem &&
-                         "InsertElementInstruction used already.");
-                  Mask[Idx] = Idx;
-                  Base = IEBase->getOperand(0);
+                  int Idx = E->findLaneForValue(Base);
+                  ShuffleMask.back()[Idx] = Idx;
+                  Base = cast<InsertElementInst>(Base)->getOperand(0);
                 } while (E == getTreeEntry(Base));
                 break;
               }
               Base = cast<InsertElementInst>(Base)->getOperand(0);
             }
-            FirstUsers.emplace_back(VU, ScalarTE);
-            DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
+            FirstUsers.push_back(VU);
+            DemandedElts.push_back(APInt::getZero(VF.back()));
             VecId = FirstUsers.size() - 1;
           } else {
-            if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
-              It->first = VU;
             VecId = std::distance(FirstUsers.begin(), It);
           }
           int InIdx = *InsertIdx;
-          SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
-          if (Mask.empty())
-            Mask.assign(FTy->getNumElements(), UndefMaskElem);
-          // InsertElement should not be used already or the scalar is part of
-          // TreeEntry, which is operand of the root insertelement instructions.
-          assert((Mask[InIdx] == UndefMaskElem ||
-                  any_of(ScalarTE->UserTreeIndices,
-                         [](const EdgeInfo &EI) {
-                           return EI.EdgeIdx == 1 &&
-                                  EI.UserTE->getOpcode() ==
-                                      Instruction::InsertElement &&
-                                  !EI.UserTE->isAltShuffle();
-                         })) &&
-                 "InsertElementInstruction used already.");
-          Mask[InIdx] = EU.Lane;
+          ShuffleMask[VecId][InIdx] = EU.Lane;
           DemandedElts[VecId].setBit(InIdx);
           continue;
         }
@@ -6630,75 +6491,89 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
 
   InstructionCost SpillCost = getSpillCost();
   Cost += SpillCost + ExtractCost;
-  auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef<int> Mask) {
-    InstructionCost C = 0;
-    unsigned VF = Mask.size();
-    unsigned VecVF = TE->getVectorFactor();
-    if (VF != VecVF &&
-        (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) ||
-         (all_of(Mask,
-                 [VF](int Idx) { return Idx < 2 * static_cast<int>(VF); }) &&
-          !ShuffleVectorInst::isIdentityMask(Mask)))) {
-      SmallVector<int> OrigMask(VecVF, UndefMaskElem);
-      std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
-                OrigMask.begin());
-      C = TTI->getShuffleCost(
+  if (FirstUsers.size() == 1) {
+    int Limit = ShuffleMask.front().size() * 2;
+    if (!all_of(ShuffleMask.front(),
+                [Limit](int Idx) { return Idx < Limit; }) ||
+        !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
+      InstructionCost C = TTI->getShuffleCost(
           TTI::SK_PermuteSingleSrc,
-          FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask);
-      LLVM_DEBUG(
-          dbgs() << "SLP: Adding cost " << C
-                 << " for final shuffle of insertelement external users.\n";
-          TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
+          cast<FixedVectorType>(FirstUsers.front()->getType()),
+          ShuffleMask.front());
+      LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+                        << " for final shuffle of insertelement external users "
+                        << *VectorizableTree.front()->Scalars.front() << ".\n"
+                        << "SLP: Current total cost = " << Cost << "\n");
       Cost += C;
-      return std::make_pair(TE, true);
     }
-    return std::make_pair(TE, false);
-  };
-  // Calculate the cost of the reshuffled vectors, if any.
-  for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
-    Value *Base = cast<Instruction>(FirstUsers[I].first)->getOperand(0);
-    unsigned VF = ShuffleMasks[I].begin()->second.size();
-    auto *FTy = FixedVectorType::get(
-        cast<VectorType>(FirstUsers[I].first->getType())->getElementType(), VF);
-    auto Vector = ShuffleMasks[I].takeVector();
-    auto &&EstimateShufflesCost = [this, FTy,
-                                   &Cost](ArrayRef<int> Mask,
-                                          ArrayRef<const TreeEntry *> TEs) {
-      assert((TEs.size() == 1 || TEs.size() == 2) &&
-             "Expected exactly 1 or 2 tree entries.");
-      if (TEs.size() == 1) {
-        int Limit = 2 * Mask.size();
-        if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) ||
-            !ShuffleVectorInst::isIdentityMask(Mask)) {
-          InstructionCost C =
-              TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
-          LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
-                            << " for final shuffle of insertelement "
-                               "external users.\n";
-                     TEs.front()->dump();
-                     dbgs() << "SLP: Current total cost = " << Cost << "\n");
-          Cost += C;
-        }
-      } else {
-        InstructionCost C =
-            TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
-        LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
-                          << " for final shuffle of vector node and external "
-                             "insertelement users.\n";
-                   if (TEs.front()) { TEs.front()->dump(); } TEs.back()->dump();
-                   dbgs() << "SLP: Current total cost = " << Cost << "\n");
-        Cost += C;
-      }
-      return TEs.back();
-    };
-    (void)performExtractsShuffleAction<const TreeEntry>(
-        makeMutableArrayRef(Vector.data(), Vector.size()), Base,
-        [](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF,
-        EstimateShufflesCost);
     InstructionCost InsertCost = TTI->getScalarizationOverhead(
-        cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
-        /*Insert*/ true, /*Extract*/ false);
+        cast<FixedVectorType>(FirstUsers.front()->getType()),
+        DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
+    LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+                      << " for insertelements gather.\n"
+                      << "SLP: Current total cost = " << Cost << "\n");
+    Cost -= InsertCost;
+  } else if (FirstUsers.size() >= 2) {
+    unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
+    // Combined masks of the first 2 vectors.
+    SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
+    copy(ShuffleMask.front(), CombinedMask.begin());
+    APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
+    auto *VecTy = FixedVectorType::get(
+        cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
+        MaxVF);
+    for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
+      if (ShuffleMask[1][I] != UndefMaskElem) {
+        CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
+        CombinedDemandedElts.setBit(I);
+      }
+    }
+    InstructionCost C =
+        TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+    LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+                      << " for final shuffle of vector node and external "
+                         "insertelement users "
+                      << *VectorizableTree.front()->Scalars.front() << ".\n"
+                      << "SLP: Current total cost = " << Cost << "\n");
+    Cost += C;
+    InstructionCost InsertCost = TTI->getScalarizationOverhead(
+        VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
+    LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+                      << " for insertelements gather.\n"
+                      << "SLP: Current total cost = " << Cost << "\n");
     Cost -= InsertCost;
+    for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
+      if (ShuffleMask[I].empty())
+        continue;
+      // Other elements - permutation of 2 vectors (the initial one and the
+      // next Ith incoming vector).
+      unsigned VF = ShuffleMask[I].size();
+      for (unsigned Idx = 0; Idx < VF; ++Idx) {
+        int Mask = ShuffleMask[I][Idx];
+        if (Mask != UndefMaskElem)
+          CombinedMask[Idx] = MaxVF + Mask;
+        else if (CombinedMask[Idx] != UndefMaskElem)
+          CombinedMask[Idx] = Idx;
+      }
+      for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
+        if (CombinedMask[Idx] != UndefMaskElem)
+          CombinedMask[Idx] = Idx;
+      InstructionCost C =
+          TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+      LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+                        << " for final shuffle of vector node and external "
+                           "insertelement users "
+                        << *VectorizableTree.front()->Scalars.front() << ".\n"
+                        << "SLP: Current total cost = " << Cost << "\n");
+      Cost += C;
+      InstructionCost InsertCost = TTI->getScalarizationOverhead(
+          cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
+          /*Insert*/ true, /*Extract*/ false);
+      LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+                        << " for insertelements gather.\n"
+                        << "SLP: Current total cost = " << Cost << "\n");
+      Cost -= InsertCost;
+    }
   }
 
 #ifndef NDEBUG

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
deleted file mode 100644
index fa4fd9d75bdb..000000000000
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-
-define void @b() {
-; CHECK-LABEL: @b(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], <float undef, float undef, float undef, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr undef, align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  %mul = fmul float undef, 2.000000e+00
-  %i = tail call float @llvm.fmuladd.f32(float %mul, float 0.000000e+00, float 0.000000e+00)
-  %mul2 = fmul float undef, %i
-  %add = fadd float undef, 1.000000e+00
-  %neg = fneg float %add
-  %i1 = tail call float @llvm.fmuladd.f32(float %neg, float 0.000000e+00, float 0.000000e+00)
-  %mul4 = fmul float undef, %i1
-  %neg7 = fneg float %mul
-  %i2 = tail call float @llvm.fmuladd.f32(float %neg7, float 0.000000e+00, float 0.000000e+00)
-  %mul8 = fmul float undef, %i2
-  %i3 = tail call float @llvm.fmuladd.f32(float %add, float 0.000000e+00, float 0.000000e+00)
-  %mul11 = fmul float %i3, 2.000000e+00
-  %div = fdiv float %mul2, 0.000000e+00
-  store float %div, ptr undef, align 4
-  %div12 = fdiv float %mul4, 0.000000e+00
-  %arrayidx13 = getelementptr inbounds float, ptr undef, i64 1
-  store float %div12, ptr %arrayidx13, align 4
-  %div14 = fdiv float %mul8, 0.000000e+00
-  %arrayidx15 = getelementptr inbounds float, ptr undef, i64 2
-  store float %div14, ptr %arrayidx15, align 4
-  %div16 = fdiv float %mul11, 0.000000e+00
-  %arrayidx17 = getelementptr inbounds float, ptr undef, i64 3
-  store float %div16, ptr %arrayidx17, align 4
-  ret void
-}
-
-declare float @llvm.fmuladd.f32(float, float, float)
-
-define void @test(float %a) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float [[A]], i32 1
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1
-; CHECK-NEXT:    br label [[LOOP]]
-;
-entry:
-  br label %loop
-
-loop:
-  %add.i157 = fadd float 0.000000e+00, %a
-  %add23.i = fadd float 0.000000e+00, %a
-  %insert = insertelement <2 x float> zeroinitializer, float %add.i157, i64 0
-  %insert.i = insertelement <2 x float> %insert, float %add23.i, i64 1
-  %agg = insertelement <2 x float> %insert.i, float %add.i157, i64 1
-  br label %loop
-}
-

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index bbd71825f96c..f529d0fc4733 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -11,27 +11,25 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[TAB2:%.*]] = alloca [256 x i32], align 16
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
+; CHECK-NEXT:    [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
 ; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
+; CHECK-NEXT:    [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
+; CHECK-NEXT:    [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
+; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index cd05b940be22..9265ca1731a0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -11,27 +11,25 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[TAB2:%.*]] = alloca [256 x i32], align 16
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
+; CHECK-NEXT:    [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
 ; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
+; CHECK-NEXT:    [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
+; CHECK-NEXT:    [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[T_0259]], i32 0
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[P3_ADDR_0258]], i32 0
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
+; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
index a0254043a91e..1589efe6553c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
@@ -6,26 +6,28 @@ define void @test() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[BODY:%.*]]
 ; CHECK:       body:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[BODY]] ]
-; CHECK-NEXT:    [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00
-; CHECK-NEXT:    [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00
-; CHECK-NEXT:    [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT:    [[ADD8_I_I:%.*]] = fadd fast double [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00
 ; CHECK-NEXT:    br i1 false, label [[BODY]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]]
 ; CHECK:       if.then135.i:
-; CHECK-NEXT:    [[CMP145_I:%.*]] = fcmp fast olt double [[PHI1]], 0.000000e+00
-; CHECK-NEXT:    [[CMP152_I:%.*]] = fcmp fast olt double [[PHI2]], 0.000000e+00
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i1> <i1 poison, i1 false>, i1 [[CMP152_I]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[TMP0]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i1> <i1 poison, i1 false>, i1 [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <2 x double> [[TMP10]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], zeroinitializer
 ; CHECK-NEXT:    br label [[IF_END209_I]]
 ; CHECK:       if.end209.i:
-; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x double> [ [[TMP4]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi <2 x double> [ [[TMP12]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
 ; CHECK-NEXT:    ret void
 ;
 entry:


        


More information about the llvm-commits mailing list