[llvm] [SLP] Create groups before sorting Phis (PR #111174)

Nabeel Omer via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 4 08:42:46 PDT 2024


https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/111174

>From 36ac0d8bb14b3a525062785b67b22e4e3870ff59 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 4 Oct 2024 16:29:01 +0100
Subject: [PATCH 1/2] [SLP] Create groups before sorting Phis

This fixes the comparator `PHICompare` by creating sorted groups of comparable instructions to avoid transitive weirdness in std::sort.

This was discussed in https://discourse.llvm.org/t/slp-vectorizer-phi-sorting-produces-different-results-between-platforms/81467/18.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 156 ++++++++++++++----
 .../SLPVectorizer/AArch64/landing_pad.ll      |  21 +--
 .../AArch64/reused-scalar-repeated-in-node.ll |  54 +++---
 .../buildvector-postpone-for-dependency.ll    |   4 +-
 .../SLPVectorizer/X86/crash_clear_undefs.ll   |   2 +-
 .../X86/crash_scheduling-inseltpoison.ll      |  12 +-
 .../SLPVectorizer/X86/crash_scheduling.ll     |  12 +-
 .../SLPVectorizer/X86/crash_sim4b1.ll         |  18 +-
 .../X86/extractelement-phi-in-landingpad.ll   |   2 +-
 ...gathered-delayed-nodes-with-reused-user.ll |   4 +-
 .../SLPVectorizer/X86/geps-non-pow-2.ll       |   6 +-
 .../SLPVectorizer/X86/landing_pad.ll          |   7 +-
 .../X86/matching-gather-nodes-phi-users.ll    |  13 +-
 .../X86/phi-nodes-as-operand-reorder.ll       |   2 +-
 .../reduced-value-replace-extractelement.ll   |   4 +-
 .../SLPVectorizer/X86/reused-undefs.ll        |   6 +-
 .../X86/value-bug-inseltpoison.ll             |   2 +-
 .../Transforms/SLPVectorizer/X86/value-bug.ll |   2 +-
 .../SLPVectorizer/extracts-with-undefs.ll     |  78 ++++++---
 .../SLPVectorizer/phi-undef-input.ll          |  16 +-
 .../SLPVectorizer/postponed_gathers.ll        |   4 +-
 21 files changed, 278 insertions(+), 147 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dee0b7e1f43714..1cfdf24ede765a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5441,40 +5441,140 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
     if (!TE.ReorderIndices.empty())
       return TE.ReorderIndices;
 
-    auto PHICompare = [&](unsigned I1, unsigned I2) {
-      Value *V1 = TE.Scalars[I1];
-      Value *V2 = TE.Scalars[I2];
-      if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0))
-        return false;
-      if (V1->getNumUses() < V2->getNumUses())
-        return true;
-      if (V1->getNumUses() > V2->getNumUses())
-        return false;
-      auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
-      auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
-      if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
-        if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
-          if (!areTwoInsertFromSameBuildVector(
-                  IE1, IE2,
-                  [](InsertElementInst *II) { return II->getOperand(0); }))
-            return I1 < I2;
-          return getElementIndex(IE1) < getElementIndex(IE2);
-        }
-      if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
-        if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
-          if (EE1->getOperand(0) != EE2->getOperand(0))
-            return I1 < I2;
-          return getElementIndex(EE1) < getElementIndex(EE2);
-        }
-      return I1 < I2;
-    };
     DenseMap<unsigned, unsigned> PhiToId;
     SmallVector<unsigned> Phis(TE.Scalars.size());
     std::iota(Phis.begin(), Phis.end(), 0);
+
+    BitVector Seen(Phis.size());
+    SmallVector<SmallVector<unsigned>> Groups;
+    Groups.resize(Phis.size(), {});
+
+    for (auto const Phidx : Phis) {
+      // We've already found a group for this Phidx
+      if (Seen.test(Phidx))
+        continue;
+
+      Groups[Phidx].push_back(Phidx);
+
+      auto UserIterPhi1 = TE.Scalars[Phidx]->user_begin();
+      if (UserIterPhi1.atEnd())
+        continue;
+
+      auto *FirstUserOfPhi1 = cast<Instruction>(*UserIterPhi1);
+
+      unsigned Count = 0;
+      if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1)) {
+        auto Width = IE1->getType()->getElementCount();
+        assert(!Width.isScalable());
+        Count = Width.getFixedValue() - 1;
+      } else if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1)) {
+        // Count is unused in the case of extract element instructions.
+        Count = -1;
+      }
+
+      for (auto const PhidxB : ArrayRef(Phis).drop_front(Phidx + 1)) {
+
+        // At this point we know that we have found all the elements that fit in
+        // this group so we'll stop.
+        if (Count == 0)
+          break;
+
+        // B is already in a group so we don't want to look at it again.
+        if (Seen.test(PhidxB))
+          continue;
+
+        auto UserIterPhi2 = TE.Scalars[PhidxB]->user_begin();
+        if (UserIterPhi2.atEnd()) {
+          Seen.set(PhidxB);
+          continue;
+        }
+
+        auto *FirstUserOfPhi2 = cast<Instruction>(*UserIterPhi2);
+
+        if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
+          if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1)) {
+            if (areTwoInsertFromSameBuildVector(
+                    IE1, IE2,
+                    [](InsertElementInst *II) { return II->getOperand(0); })) {
+              Groups[Phidx].push_back(PhidxB);
+              Seen.set(PhidxB);
+              --Count;
+              continue;
+            }
+          }
+        } else if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1)) {
+          if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
+            if (EE1->getVectorOperand() == EE2->getVectorOperand()) {
+              Groups[Phidx].push_back(PhidxB);
+              Seen.set(PhidxB);
+              continue;
+            }
+          }
+        }
+      }
+
+      std::sort(
+          Groups[Phidx].begin(), Groups[Phidx].end(),
+          [TE](unsigned I1, unsigned I2) {
+            auto U1 = TE.Scalars[I1]->user_begin();
+            if (U1.atEnd())
+              return false;
+            auto U2 = TE.Scalars[I2]->user_begin();
+            if (U2.atEnd())
+              return true;
+            
+            if (auto *FirstUserOfPhi1 =
+                    dyn_cast<InsertElementInst>(*(U1))) {
+              if (auto *FirstUserOfPhi2 = dyn_cast<InsertElementInst>(
+                      *(U2))) {
+                return getElementIndex(FirstUserOfPhi1) <
+                       getElementIndex(FirstUserOfPhi2);
+              }
+            }
+
+            if (auto *FirstUserOfPhi1 =
+                    dyn_cast<ExtractElementInst>(*(U1))) {
+              if (auto *FirstUserOfPhi2 = dyn_cast<ExtractElementInst>(
+                      *(U2))) {
+                return FirstUserOfPhi1->getIndexOperand() <
+                       FirstUserOfPhi2->getIndexOperand();
+              }
+            }
+
+            llvm_unreachable(
+                "Found something other than InsertElement or ExtractElement");
+          });
+    }
+
+    // Sort the groups on the basis of their ordering in the block.
+    std::sort(Groups.begin(), Groups.end(),
+              [TE](SmallVector<unsigned> I1, SmallVector<unsigned> I2) {
+                if (I1.empty())
+                  return false;
+                if (I2.empty())
+                  return true;
+
+                auto PhidxA = I1.front();
+                auto *InstA = TE.Scalars[PhidxA];
+                auto *Phi1 = cast<Instruction>(InstA);
+
+                auto PhidxB = I2.front();
+                auto *InstB = TE.Scalars[PhidxB];
+                auto *Phi2 = cast<Instruction>(InstB);
+
+                return Phi1->comesBefore(Phi2);
+              });
+
     OrdersType ResOrder(TE.Scalars.size());
     for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id)
       PhiToId[Id] = Id;
-    stable_sort(Phis, PHICompare);
+
+    Phis.clear();
+    for (auto const &group : Groups) {
+      for (auto const element : group)
+        Phis.push_back(element);
+    }
+
     for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
       ResOrder[Id] = PhiToId[Phis[Id]];
     if (isIdentityOrder(ResOrder))
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll
index e4b6c06b79fc1b..4b99e2a8ff7958 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll
@@ -8,7 +8,7 @@
 ; YAML-NEXT: Function:        foo
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '2'
+; YAML-NEXT:   - Cost:            '0'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '2'
 
@@ -28,7 +28,7 @@
 ; YAML-NEXT: Function:        foo
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '2'
+; YAML-NEXT:   - Cost:            '3'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '9'
 
@@ -45,36 +45,37 @@ define void @foo() personality ptr @bar {
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x i64> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 0, i64 0, i64 poison) [ "deopt"() ]
-; CHECK-NEXT:    to label [[BB4:%.*]] unwind label [[BB10:%.*]]
+; CHECK-NEXT:            to label [[BB4:%.*]] unwind label [[BB10:%.*]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    br i1 poison, label [[BB11:%.*]], label [[BB5:%.*]]
 ; CHECK:       bb5:
 ; CHECK-NEXT:    br label [[BB7:%.*]]
 ; CHECK:       bb6:
-; CHECK-NEXT:    [[TMP4]] = phi <2 x i64> [ <i64 0, i64 poison>, [[BB8:%.*]] ]
+; CHECK-NEXT:    [[TMP4]] = phi <2 x i64> [ <i64 poison, i64 0>, [[BB8:%.*]] ]
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    [[LOCAL_5_84111:%.*]] = phi i64 [ poison, [[BB8]] ], [ poison, [[BB5]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_5_84111]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 poison, i64 poison, i64 poison) [ "deopt"() ]
-; CHECK-NEXT:    to label [[BB8]] unwind label [[BB12:%.*]]
+; CHECK-NEXT:            to label [[BB8]] unwind label [[BB12:%.*]]
 ; CHECK:       bb8:
 ; CHECK-NEXT:    br i1 poison, label [[BB7]], label [[BB6]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
-; CHECK-NEXT:    [[TMP7]] = phi <2 x i64> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ]
+; CHECK-NEXT:    [[TMP7]] = phi <2 x i64> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP8]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ]
 ; CHECK-NEXT:    [[LANDING_PAD68:%.*]] = landingpad { ptr, i64 }
-; CHECK-NEXT:    cleanup
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    [[TMP9]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    br label [[BB9]]
 ; CHECK:       bb11:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP9]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ]
+; CHECK-NEXT:    [[TMP10]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ]
 ; CHECK-NEXT:    [[LANDING_PAD149:%.*]] = landingpad { ptr, i64 }
-; CHECK-NEXT:    cleanup
+; CHECK-NEXT:            cleanup
 ; CHECK-NEXT:    br label [[BB9]]
 ;
 bb1:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index dbc4f3d59d4f9b..6f63b2a8f9aac1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -13,6 +13,7 @@ define void @test() {
 ; CHECK-NEXT:    br label %[[BB64]]
 ; CHECK:       [[BB64]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[I66:%.*]] = load float, ptr poison, align 16
 ; CHECK-NEXT:    [[I67:%.*]] = load float, ptr poison, align 4
 ; CHECK-NEXT:    [[I68:%.*]] = load float, ptr poison, align 8
@@ -25,48 +26,45 @@ define void @test() {
 ; CHECK-NEXT:    [[I75:%.*]] = load float, ptr poison, align 16
 ; CHECK-NEXT:    [[I76:%.*]] = load float, ptr poison, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x float> poison, float [[I76]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x float> [[TMP2]], float [[I74]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I73]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I71]], i32 4
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I70]], i32 5
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I74]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I73]], i32 5
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I72]], i32 6
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I71]], i32 7
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I70]], i32 8
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I69]], i32 10
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I68]], i32 13
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 14
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x float> [[TMP11]], float [[I66]], i32 15
 ; CHECK-NEXT:    br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
 ; CHECK:       [[BB77]]:
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x float> [[TMP12]], <16 x float> poison, <8 x i32> <i32 poison, i32 15, i32 8, i32 13, i32 10, i32 14, i32 14, i32 14>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 1, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    br label %[[BB78:.*]]
 ; CHECK:       [[BB78]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
-; CHECK-NEXT:    [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP15:%.*]] = phi <8 x float> [ [[TMP14]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP32:%.*]], %[[BB78]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 5, i32 1, i32 2, i32 3, i32 4, i32 1, i32 0, i32 3, i32 5, i32 1, i32 0, i32 3, i32 5, i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 2, i32 poison, i32 4, i32 poison, i32 0, i32 poison, i32 5, i32 poison, i32 2, i32 poison, i32 4, i32 poison, i32 2, i32 poison, i32 4, i32 poison>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 16, i32 2, i32 17, i32 4, i32 poison, i32 6, i32 poison, i32 8, i32 poison, i32 10, i32 poison, i32 12, i32 poison, i32 14, i32 poison>
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 1, i32 6, i32 3, i32 8, i32 1, i32 10, i32 3, i32 12, i32 1, i32 14, i32 3>
+; CHECK-NEXT:    [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[TMP2]], i64 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
 ; CHECK-NEXT:    [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
-; CHECK-NEXT:    [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
+; CHECK-NEXT:    [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 15, i32 8, i32 13, i32 10, i32 14, i32 14, i32 14>
+; CHECK-NEXT:    [[TMP32]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 9, i32 11>
 ; CHECK-NEXT:    br i1 poison, label %[[BB78]], label %[[BB167]]
 ; CHECK:       [[BB167]]:
-; CHECK-NEXT:    [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
+; CHECK-NEXT:    [[TMP30:%.*]] = phi <16 x float> [ [[TMP12]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <16 x float> [[TMP30]], i32 14
 ; CHECK-NEXT:    store float [[TMP33]], ptr poison, align 1
-; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <16 x float> [[TMP30]], i32 6
 ; CHECK-NEXT:    store float [[TMP34]], ptr poison, align 1
-; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <16 x float> [[TMP30]], i32 10
 ; CHECK-NEXT:    br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
 ; CHECK:       [[BB184]]:
 ; CHECK-NEXT:    br label %[[BB185:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index 43c42c1ea2bfb5..0b948d78821837 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -13,11 +13,11 @@ define void @test() {
 ; CHECK:       [[BB6]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
 ; CHECK-NEXT:    br i1 false, label %[[BB2]], label %[[BB6]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
index de99654d84eb81..749257a9ddc38f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
@@ -25,7 +25,7 @@ define i1 @foo() {
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x float> [[TMP5]], <4 x float> [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    br label [[TMP11]]
 ; CHECK:       11:
 ; CHECK-NEXT:    [[TMP12:%.*]] = phi <4 x float> [ [[TMP10]], [[TMP2]] ], [ zeroinitializer, [[TMP0:%.*]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 2f69a01bb5e370..e63840ebf8f7fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
+; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 9b35fcaebadf95..6f3b1f8fa9cd42 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
+; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
index 1c0fa1d85f9e4a..d4d5be89532a13 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
@@ -27,23 +27,24 @@ define void @SIM4() {
 ; CHECK:       land.rhs.lr.ph:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       if.end98:
+; CHECK-NEXT:    [[FROM299:%.*]] = getelementptr inbounds [[STRUCT__EXON_T_12_103_220_363_480_649_740_857_1039_1065_1078_1091_1117_1130_1156_1169_1195_1221_1234_1286_1299_1312_1338_1429_1455_1468_1494_1520_1884_1897_1975_2066_2105_2170_2171:%.*]], ptr undef, i64 0, i32 1
 ; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE167]], label [[IF_THEN103:%.*]]
 ; CHECK:       if.then103:
 ; CHECK-NEXT:    [[DOTSUB100:%.*]] = select i1 undef, i32 250, i32 undef
 ; CHECK-NEXT:    [[MUL114:%.*]] = shl nsw i32 [[DOTSUB100]], 2
 ; CHECK-NEXT:    [[COND125:%.*]] = select i1 undef, i32 undef, i32 [[MUL114]]
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[COND125]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[DOTSUB100]], i32 1
 ; CHECK-NEXT:    br label [[FOR_COND_I:%.*]]
 ; CHECK:       for.cond.i:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x i32> [ undef, [[LAND_RHS_I874:%.*]] ], [ [[TMP1]], [[IF_THEN103]] ]
+; CHECK-NEXT:    [[ROW_0_I:%.*]] = phi i32 [ undef, [[LAND_RHS_I874:%.*]] ], [ [[DOTSUB100]], [[IF_THEN103]] ]
+; CHECK-NEXT:    [[COL_0_I:%.*]] = phi i32 [ undef, [[LAND_RHS_I874]] ], [ [[COND125]], [[IF_THEN103]] ]
 ; CHECK-NEXT:    br i1 undef, label [[LAND_RHS_I874]], label [[FOR_END_I:%.*]]
 ; CHECK:       land.rhs.i874:
 ; CHECK-NEXT:    br i1 undef, label [[FOR_COND_I]], label [[FOR_END_I]]
 ; CHECK:       for.end.i:
 ; CHECK-NEXT:    br i1 undef, label [[IF_THEN_I:%.*]], label [[IF_END_I:%.*]]
 ; CHECK:       if.then.i:
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], undef
+; CHECK-NEXT:    [[ADD14_I:%.*]] = add nsw i32 [[ROW_0_I]], undef
+; CHECK-NEXT:    [[ADD15_I:%.*]] = add nsw i32 [[COL_0_I]], undef
 ; CHECK-NEXT:    br label [[EXTEND_BW_EXIT:%.*]]
 ; CHECK:       if.end.i:
 ; CHECK-NEXT:    [[ADD16_I:%.*]] = add i32 [[COND125]], [[DOTSUB100]]
@@ -64,11 +65,14 @@ define void @SIM4() {
 ; CHECK:       while.end275.i:
 ; CHECK-NEXT:    br label [[EXTEND_BW_EXIT]]
 ; CHECK:       extend_bw.exit:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x i32> [ [[TMP3]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ]
+; CHECK-NEXT:    [[ADD14_I1262:%.*]] = phi i32 [ [[ADD14_I]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ]
+; CHECK-NEXT:    [[ADD15_I1261:%.*]] = phi i32 [ [[ADD15_I]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ]
 ; CHECK-NEXT:    br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]]
 ; CHECK:       if.then157:
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], <i32 1, i32 1>
-; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr undef, align 4
+; CHECK-NEXT:    [[ADD158:%.*]] = add nsw i32 [[ADD14_I1262]], 1
+; CHECK-NEXT:    store i32 [[ADD158]], ptr [[FROM299]], align 4
+; CHECK-NEXT:    [[ADD160:%.*]] = add nsw i32 [[ADD15_I1261]], 1
+; CHECK-NEXT:    store i32 [[ADD160]], ptr undef, align 4
 ; CHECK-NEXT:    br label [[LAND_LHS_TRUE167]]
 ; CHECK:       land.lhs.true167:
 ; CHECK-NEXT:    unreachable
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll
index 7476c77c583205..31f97647365cdd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll
@@ -13,7 +13,7 @@ define void @test() personality ptr null {
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ poison, %[[BB2]] ]
 ; CHECK-NEXT:    [[LANDINGPAD:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:            cleanup
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
 ; CHECK-NEXT:    call void null(i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[BB65]]:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index fa33621de5ae76..575c4bc726f264 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -16,11 +16,11 @@ define i64 @foo() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-; CHECK-NEXT:    [[ADD]] = add i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[ADD]] = add i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
 ; CHECK-NEXT:    [[TMP9]] = or i64 [[PHI5]], 0
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
-; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
+; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[ADD]], i32 1
 ; CHECK-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 ; FORCED-LABEL: define i64 @foo() {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
index e0d7c12f70c2ef..4168cdb935a5c9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
@@ -13,7 +13,7 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[A_020:%.*]] = phi ptr [ [[A_020_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
@@ -24,7 +24,7 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-NEXT:      i32 4, label [[SW_BB6:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb:
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2
@@ -36,7 +36,7 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
 ; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP13]], align 4
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
 ; CHECK:       while.body.backedge:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
index 47b42bc8f32a7d..d33231439f87f6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
@@ -21,7 +21,7 @@ define void @foo() personality ptr @bar {
 ; CHECK:       bb5:
 ; CHECK-NEXT:    br label [[BB7:%.*]]
 ; CHECK:       bb6:
-; CHECK-NEXT:    [[TMP3]] = phi <2 x i32> [ <i32 0, i32 poison>, [[BB8:%.*]] ]
+; CHECK-NEXT:    [[TMP3]] = phi <2 x i32> [ <i32 poison, i32 0>, [[BB8:%.*]] ]
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ]
@@ -32,7 +32,7 @@ define void @foo() personality ptr @bar {
 ; CHECK-NEXT:    br i1 poison, label [[BB7]], label [[BB6]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ]
 ; CHECK-NEXT:    [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 0>
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb10:
@@ -43,9 +43,10 @@ define void @foo() personality ptr @bar {
 ; CHECK:       bb11:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ]
 ; CHECK-NEXT:    [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    br label [[BB9]]
 ;
 bb1:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index e5d7ad138b4def..0c6cb4e9a21b37 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
 ; YAML: Function:        test
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '-6'
+; YAML:   - Cost:            '-5'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '14'
 ; YAML: ...
@@ -34,10 +34,10 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) {
 ; CHECK-NEXT:    [[CALL1:%.*]] = call fast float @foo(float [[B]])
 ; CHECK-NEXT:    [[CALL2:%.*]] = call fast float @foo(float [[C]])
 ; CHECK-NEXT:    [[CALL3:%.*]] = call fast float @foo(float [[D]])
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[CALL0]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[CALL1]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[CALL2]], i32 2
-; CHECK-NEXT:    [[TMP5]] = insertelement <4 x float> [[TMP4]], float [[CALL3]], i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[CALL3]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[CALL2]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[CALL1]], i32 2
+; CHECK-NEXT:    [[TMP5]] = insertelement <4 x float> [[TMP4]], float [[CALL0]], i32 3
 ; CHECK-NEXT:    br i1 poison, label [[USERBLOCK0]], label [[USERBLOCK1]]
 ; CHECK:       UserBlock1:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <4 x float> [ [[TMP5]], [[BLKX]] ], [ [[TMP9:%.*]], [[LOOP_INNER]] ]
@@ -51,7 +51,8 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi <4 x float> [ <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ]
 ; CHECK-NEXT:    [[IDX0:%.*]] = add i64 0, poison
 ; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IDX0]]
-; CHECK-NEXT:    store <4 x float> [[TMP10]], ptr [[GEP0]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    store <4 x float> [[TMP11]], ptr [[GEP0]], align 4
 ; CHECK-NEXT:    ret void
 ;
 Entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll
index 51ce970bf06bc8..e16ff250a54c44 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll
@@ -9,7 +9,7 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP3:%.*]], %[[BB3:.*]] ]
 ; CHECK-NEXT:    br i1 false, label %[[BB6:.*]], label %[[BB3]]
 ; CHECK:       [[BB3]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3]] = add <2 x i32> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    br i1 false, label %[[BB6]], label %[[BB1]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
index 5cbf78435233bb..dd6c88d2406f03 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
@@ -8,12 +8,12 @@ define void @test() {
 ; CHECK-NEXT:    br label %[[BB1:.*]]
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> zeroinitializer)
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TRUNC]]
-; CHECK-NEXT:    [[TMP4]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[OP_RDX1]], i32 1
+; CHECK-NEXT:    [[TMP4]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OP_RDX1]], i32 0
 ; CHECK-NEXT:    br label %[[BB1]]
 ;
 bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll
index ce6a477f30a085..95bfa91e1d5f3d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll
@@ -6,15 +6,15 @@ define i32 @main(i32 %0) {
 ; CHECK-NEXT:  for.cond.preheader:
 ; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]]
 ; CHECK:       for.inc.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 undef>, i32 [[TMP0:%.*]], i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0>, i32 [[TMP0:%.*]], i32 0
 ; CHECK-NEXT:    br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[L1_PREHEADER]]
 ; CHECK:       L1.preheader:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <8 x i32> [ [[TMP3]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ]
 ; CHECK-NEXT:    ret i32 0
 ;
 for.cond.preheader:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
index eb5e218f057ce0..c44a7f9e402dc4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
@@ -31,7 +31,7 @@ define void @test() {
 ; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double undef, double 0.000000e+00>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double 0.000000e+00, double undef>
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef
 ; CHECK-NEXT:    [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
index f870cb44f4e5f6..86817f0472312e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
@@ -31,7 +31,7 @@ define void @test() {
 ; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double undef, double 0.000000e+00>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double 0.000000e+00, double undef>
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef
 ; CHECK-NEXT:    [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
diff --git a/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll
index dca34b681032c4..e9816f53cf2080 100644
--- a/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll
@@ -1,32 +1,58 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck --check-prefix=X86 %s %}
+; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu | FileCheck --check-prefix=AARCH %s %}
 
 define void @test() {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[BODY:%.*]]
-; CHECK:       body:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[BODY]] ]
-; CHECK-NEXT:    [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00
-; CHECK-NEXT:    [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00
-; CHECK-NEXT:    [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]]
-; CHECK-NEXT:    [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00
-; CHECK-NEXT:    br i1 false, label [[BODY]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]]
-; CHECK:       if.then135.i:
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i1> <i1 poison, i1 false>, <2 x i1> [[TMP1]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    br label [[IF_END209_I]]
-; CHECK:       if.end209.i:
-; CHECK-NEXT:    [[TMP7:%.*]] = phi <2 x double> [ [[TMP6]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
-; CHECK-NEXT:    ret void
+; X86-LABEL: @test(
+; X86-NEXT:  entry:
+; X86-NEXT:    br label [[BODY:%.*]]
+; X86:       body:
+; X86-NEXT:    [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ]
+; X86-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[BODY]] ]
+; X86-NEXT:    [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00
+; X86-NEXT:    [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00
+; X86-NEXT:    [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]]
+; X86-NEXT:    [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00
+; X86-NEXT:    br i1 false, label [[BODY]], label [[EXIT:%.*]]
+; X86:       exit:
+; X86-NEXT:    br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]]
+; X86:       if.then135.i:
+; X86-NEXT:    [[TMP1:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer
+; X86-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i1> <i1 poison, i1 false>, <2 x i1> [[TMP1]], <2 x i32> <i32 3, i32 1>
+; X86-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
+; X86-NEXT:    [[TMP4:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP3]]
+; X86-NEXT:    [[TMP5:%.*]] = fmul fast <2 x double> [[TMP4]], zeroinitializer
+; X86-NEXT:    [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], zeroinitializer
+; X86-NEXT:    br label [[IF_END209_I]]
+; X86:       if.end209.i:
+; X86-NEXT:    [[TMP7:%.*]] = phi <2 x double> [ [[TMP6]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
+; X86-NEXT:    ret void
+;
+; AARCH-LABEL: @test(
+; AARCH-NEXT:  entry:
+; AARCH-NEXT:    br label [[BODY:%.*]]
+; AARCH:       body:
+; AARCH-NEXT:    [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ]
+; AARCH-NEXT:    [[PHI2:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[BODY]] ]
+; AARCH-NEXT:    [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00
+; AARCH-NEXT:    [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00
+; AARCH-NEXT:    [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]]
+; AARCH-NEXT:    [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00
+; AARCH-NEXT:    br i1 false, label [[BODY]], label [[EXIT:%.*]]
+; AARCH:       exit:
+; AARCH-NEXT:    br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]]
+; AARCH:       if.then135.i:
+; AARCH-NEXT:    [[CMP145_I:%.*]] = fcmp fast olt double [[PHI1]], 0.000000e+00
+; AARCH-NEXT:    [[CMP152_I:%.*]] = fcmp fast olt double [[PHI2]], 0.000000e+00
+; AARCH-NEXT:    [[TMP0:%.*]] = insertelement <2 x i1> <i1 poison, i1 false>, i1 [[CMP152_I]], i32 0
+; AARCH-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[TMP0]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
+; AARCH-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP1]]
+; AARCH-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer
+; AARCH-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], zeroinitializer
+; AARCH-NEXT:    br label [[IF_END209_I]]
+; AARCH:       if.end209.i:
+; AARCH-NEXT:    [[TMP5:%.*]] = phi <2 x double> [ [[TMP4]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
+; AARCH-NEXT:    ret void
 ;
 entry:
   br label %body
diff --git a/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll
index b9802a0adb8aaf..c81d1f3db94b11 100644
--- a/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll
+++ b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll
@@ -159,13 +159,13 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG0:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG0:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[ARG2:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 poison, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP6]]
@@ -196,13 +196,13 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG1:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG3:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[ARG0:%.*]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG0:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG1:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[ARG2:%.*]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 poison, i8 0, i8 undef, i8 0>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll
index f6bed797b9ba91..45be77cc21f140 100644
--- a/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll
+++ b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll
@@ -6,8 +6,8 @@ define void @foo() {
 ; CHECK-LABEL: define void @foo() {
 ; CHECK-NEXT:  bci_0:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(1) null, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
 ; CHECK-NEXT:    br label [[BCI_252:%.*]]
 ; CHECK:       bci_252:
 ; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[BCI_0:%.*]] ], [ [[TMP16:%.*]], [[BCI_252_1:%.*]] ]

>From 4f9eb2ed2198ad687b400a2fad3813caf63ce6d9 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 4 Oct 2024 16:42:35 +0100
Subject: [PATCH 2/2] Fix formatting

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f21ff5f8825656..df5d81a5466dfb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5529,20 +5529,16 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
             auto U2 = TE.Scalars[I2]->user_begin();
             if (U2.atEnd())
               return true;
-            
-            if (auto *FirstUserOfPhi1 =
-                    dyn_cast<InsertElementInst>(*(U1))) {
-              if (auto *FirstUserOfPhi2 = dyn_cast<InsertElementInst>(
-                      *(U2))) {
+
+            if (auto *FirstUserOfPhi1 = dyn_cast<InsertElementInst>(*(U1))) {
+              if (auto *FirstUserOfPhi2 = dyn_cast<InsertElementInst>(*(U2))) {
                 return getElementIndex(FirstUserOfPhi1) <
                        getElementIndex(FirstUserOfPhi2);
               }
             }
 
-            if (auto *FirstUserOfPhi1 =
-                    dyn_cast<ExtractElementInst>(*(U1))) {
-              if (auto *FirstUserOfPhi2 = dyn_cast<ExtractElementInst>(
-                      *(U2))) {
+            if (auto *FirstUserOfPhi1 = dyn_cast<ExtractElementInst>(*(U1))) {
+              if (auto *FirstUserOfPhi2 = dyn_cast<ExtractElementInst>(*(U2))) {
                 return FirstUserOfPhi1->getIndexOperand() <
                        FirstUserOfPhi2->getIndexOperand();
               }



More information about the llvm-commits mailing list