[llvm] [SLPVectorizer] Use accurate cost for external users of resize shuffles (PR #137419)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 12 08:08:52 PDT 2025


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/137419

>From 8bbd7f9a0ffbef95beedb66756c5d6eab560510a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 25 Apr 2025 12:11:10 -0700
Subject: [PATCH 1/3] [SLPVectorizer] Use accurate cost for external users of
 resize shuffles

Change-Id: I7620d2bd3d65be994bd290b84267832fdb4f1bb4
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  54 +++++---
 .../SLPVectorizer/AMDGPU/external-shuffle.ll  | 128 ++++++------------
 .../SLPVectorizer/X86/buildvector-shuffle.ll  |   9 +-
 .../extractelement-single-use-many-nodes.ll   |  11 +-
 .../X86/insertelements-with-reused-indices.ll |  10 +-
 5 files changed, 96 insertions(+), 116 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c3ca22dce0cc4..fd58da64901f3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14886,25 +14886,47 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
 
   Cost += ExtractCost;
   auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef<int> Mask,
-                                    bool) {
+                                    bool ForSingleMask) {
     InstructionCost C = 0;
     unsigned VF = Mask.size();
     unsigned VecVF = TE->getVectorFactor();
-    if (VF != VecVF &&
-        (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) ||
-         !ShuffleVectorInst::isIdentityMask(Mask, VF))) {
-      SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
-      std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
-                OrigMask.begin());
-      C = ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
-                           getWidenedType(TE->getMainOp()->getType(), VecVF),
-                           OrigMask);
-      LLVM_DEBUG(
-          dbgs() << "SLP: Adding cost " << C
-                 << " for final shuffle of insertelement external users.\n";
-          TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
-      Cost += C;
-      return std::make_pair(TE, true);
+    bool HasLargeIndex =
+        any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); });
+    if ((VF != VecVF && HasLargeIndex) ||
+        !ShuffleVectorInst::isIdentityMask(Mask, VF)) {
+
+      if (HasLargeIndex) {
+        SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
+        std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
+                  OrigMask.begin());
+        C = ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
+                             getWidenedType(TE->getMainOp()->getType(), VecVF),
+                             OrigMask);
+        LLVM_DEBUG(
+            dbgs() << "SLP: Adding cost " << C
+                   << " for final shuffle of insertelement external users.\n";
+            TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
+        Cost += C;
+        return std::make_pair(TE, true);
+      }
+
+      if (!ForSingleMask) {
+        SmallVector<int> ResizeMask(VF, PoisonMaskElem);
+        for (unsigned I = 0; I < VF; ++I) {
+          if (Mask[I] != PoisonMaskElem)
+            ResizeMask[Mask[I]] = Mask[I];
+        }
+        if (!ShuffleVectorInst::isIdentityMask(Mask, VF))
+          C = ::getShuffleCost(
+              *TTI, TTI::SK_PermuteSingleSrc,
+              getWidenedType(TE->getMainOp()->getType(), VecVF), ResizeMask);
+        LLVM_DEBUG(
+            dbgs() << "SLP: Adding cost " << C
+                   << " for final shuffle of insertelement external users.\n";
+            TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
+
+        Cost += C;
+      }
     }
     return std::make_pair(TE, false);
   };
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll
index ce9e47a03dee3..f3e89b60b8045 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll
@@ -10,124 +10,84 @@ define void @phi_4(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out,
 ; GCN-NEXT:  [[ENTRY:.*]]:
 ; GCN-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(3) [[INPTR0]], align 8
 ; GCN-NEXT:    [[GEP2:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 2
-; GCN-NEXT:    [[GEP3:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 3
 ; GCN-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2
 ; GCN-NEXT:    [[GEP4:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 4
-; GCN-NEXT:    [[GEP5:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 5
 ; GCN-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8
 ; GCN-NEXT:    [[GEP6:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 6
-; GCN-NEXT:    [[GEP7:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 7
 ; GCN-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2
 ; GCN-NEXT:    [[GEP8:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 8
-; GCN-NEXT:    [[GEP9:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 9
 ; GCN-NEXT:    [[TMP4:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8
 ; GCN-NEXT:    [[GEP10:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 10
-; GCN-NEXT:    [[GEP11:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 11
 ; GCN-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2
 ; GCN-NEXT:    [[GEP12:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 12
-; GCN-NEXT:    [[GEP13:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 13
 ; GCN-NEXT:    [[TMP6:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8
 ; GCN-NEXT:    [[GEP14:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 14
 ; GCN-NEXT:    [[TMP7:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP14]], align 2
-; GCN-NEXT:    [[TMP8:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0
-; GCN-NEXT:    [[TMP9:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1
-; GCN-NEXT:    [[TMP10:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
-; GCN-NEXT:    [[TMP11:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
-; GCN-NEXT:    [[TMP12:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
-; GCN-NEXT:    [[TMP13:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
-; GCN-NEXT:    [[TMP14:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
-; GCN-NEXT:    [[TMP15:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
-; GCN-NEXT:    [[TMP24:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0
-; GCN-NEXT:    [[TMP26:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
-; GCN-NEXT:    [[TMP28:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0
-; GCN-NEXT:    [[TMP38:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1
 ; GCN-NEXT:    br label %[[DO_BODY:.*]]
 ; GCN:       [[DO_BODY]]:
-; GCN-NEXT:    [[PHI2:%.*]] = phi i16 [ [[TMP8]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI3:%.*]] = phi i16 [ [[TMP9]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI4:%.*]] = phi i16 [ [[TMP10]], %[[ENTRY]] ], [ [[TMP39:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI5:%.*]] = phi i16 [ [[TMP11]], %[[ENTRY]] ], [ [[OTHERELE5:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI6:%.*]] = phi i16 [ [[TMP12]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI7:%.*]] = phi i16 [ [[TMP13]], %[[ENTRY]] ], [ [[OTHERELE7:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI8:%.*]] = phi i16 [ [[TMP14]], %[[ENTRY]] ], [ [[TMP40:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI9:%.*]] = phi i16 [ [[TMP15]], %[[ENTRY]] ], [ [[OTHERELE9:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI10:%.*]] = phi i16 [ [[TMP24]], %[[ENTRY]] ], [ [[TMP34:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI11:%.*]] = phi i16 [ [[TMP26]], %[[ENTRY]] ], [ [[OTHERELE11:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI12:%.*]] = phi i16 [ [[TMP28]], %[[ENTRY]] ], [ [[TMP35:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[PHI13:%.*]] = phi i16 [ [[TMP38]], %[[ENTRY]] ], [ [[OTHERELE13:%.*]], %[[DO_BODY]] ]
-; GCN-NEXT:    [[TMP41:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP16:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP16:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP17:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP18:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP19:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP12:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP20:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP13:%.*]] = phi <2 x i16> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP21:%.*]], %[[DO_BODY]] ]
+; GCN-NEXT:    [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP22:%.*]], %[[DO_BODY]] ]
 ; GCN-NEXT:    [[TMP42:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP23:%.*]], %[[DO_BODY]] ]
 ; GCN-NEXT:    [[TMP16]] = load <2 x i16>, ptr addrspace(3) [[INPTR0]], align 8
-; GCN-NEXT:    [[OTHERELE3]] = load i16, ptr addrspace(3) [[GEP3]], align 1
-; GCN-NEXT:    [[TMP17:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2
-; GCN-NEXT:    [[OTHERELE5]] = load i16, ptr addrspace(3) [[GEP5]], align 1
-; GCN-NEXT:    [[TMP18:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8
-; GCN-NEXT:    [[OTHERELE7]] = load i16, ptr addrspace(3) [[GEP7]], align 1
-; GCN-NEXT:    [[TMP19:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2
-; GCN-NEXT:    [[OTHERELE9]] = load i16, ptr addrspace(3) [[GEP9]], align 1
-; GCN-NEXT:    [[TMP20:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8
-; GCN-NEXT:    [[OTHERELE11]] = load i16, ptr addrspace(3) [[GEP11]], align 1
-; GCN-NEXT:    [[TMP21:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2
-; GCN-NEXT:    [[OTHERELE13]] = load i16, ptr addrspace(3) [[GEP13]], align 1
-; GCN-NEXT:    [[TMP22:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8
+; GCN-NEXT:    [[TMP17]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2
+; GCN-NEXT:    [[TMP18]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8
+; GCN-NEXT:    [[TMP19]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2
+; GCN-NEXT:    [[TMP20]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8
+; GCN-NEXT:    [[TMP21]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2
+; GCN-NEXT:    [[TMP22]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8
 ; GCN-NEXT:    [[TMP23]] = load <2 x i16>, ptr addrspace(3) [[GEP14]], align 2
 ; GCN-NEXT:    [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
-; GCN-NEXT:    [[TMP30]] = extractelement <2 x i16> [[TMP17]], i32 0
-; GCN-NEXT:    [[TMP39]] = extractelement <2 x i16> [[TMP18]], i32 0
-; GCN-NEXT:    [[TMP32]] = extractelement <2 x i16> [[TMP19]], i32 0
-; GCN-NEXT:    [[TMP40]] = extractelement <2 x i16> [[TMP20]], i32 0
-; GCN-NEXT:    [[TMP34]] = extractelement <2 x i16> [[TMP21]], i32 0
-; GCN-NEXT:    [[TMP35]] = extractelement <2 x i16> [[TMP22]], i32 0
 ; GCN-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
 ; GCN:       [[EXIT]]:
-; GCN-NEXT:    [[TMP36:%.*]] = shufflevector <2 x i16> [[TMP16]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[TMP37:%.*]] = shufflevector <2 x i16> [[TMP17]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC038:%.*]] = shufflevector <16 x i16> [[TMP36]], <16 x i16> [[TMP37]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; GCN-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP16]], <2 x i16> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP18]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC059:%.*]] = shufflevector <16 x i16> [[VEC038]], <16 x i16> [[TMP25]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; GCN-NEXT:    [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP24]], <16 x i16> [[TMP25]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[TMP27:%.*]] = shufflevector <2 x i16> [[TMP19]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC0710:%.*]] = shufflevector <16 x i16> [[VEC059]], <16 x i16> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; GCN-NEXT:    [[TMP28:%.*]] = shufflevector <16 x i16> [[TMP26]], <16 x i16> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[TMP29:%.*]] = shufflevector <2 x i16> [[TMP20]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC0911:%.*]] = shufflevector <16 x i16> [[VEC0710]], <16 x i16> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; GCN-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i16> [[TMP28]], <16 x i16> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[TMP31:%.*]] = shufflevector <2 x i16> [[TMP21]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC01112:%.*]] = shufflevector <16 x i16> [[VEC0911]], <16 x i16> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; GCN-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i16> [[TMP30]], <16 x i16> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[TMP33:%.*]] = shufflevector <2 x i16> [[TMP22]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[TMP58:%.*]] = shufflevector <16 x i16> [[VEC01112]], <16 x i16> [[TMP33]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
-; GCN-NEXT:    [[TMP60:%.*]] = shufflevector <2 x i16> [[TMP23]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i16> [[TMP32]], <16 x i16> [[TMP33]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP48:%.*]] = shufflevector <2 x i16> [[TMP23]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP49:%.*]] = shufflevector <16 x i16> [[TMP47]], <16 x i16> [[TMP48]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; GCN-NEXT:    [[TMP37:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP38:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i16> [[TMP37]], <16 x i16> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP40:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP41:%.*]] = shufflevector <16 x i16> [[TMP39]], <16 x i16> [[TMP40]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP57:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP43:%.*]] = shufflevector <16 x i16> [[TMP41]], <16 x i16> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP44:%.*]] = shufflevector <2 x i16> [[TMP5]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i16> [[TMP43]], <16 x i16> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP46:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP58:%.*]] = shufflevector <16 x i16> [[TMP45]], <16 x i16> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP60:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC2157:%.*]] = shufflevector <16 x i16> [[TMP58]], <16 x i16> [[TMP60]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; GCN-NEXT:    [[TMP50:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[TMP51:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP50:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP51:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC231:%.*]] = shufflevector <16 x i16> [[TMP50]], <16 x i16> [[TMP51]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GCN-NEXT:    [[TMP52:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP52:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC252:%.*]] = shufflevector <16 x i16> [[VEC231]], <16 x i16> [[TMP52]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GCN-NEXT:    [[TMP53:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP53:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC273:%.*]] = shufflevector <16 x i16> [[VEC252]], <16 x i16> [[TMP53]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GCN-NEXT:    [[TMP54:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP54:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC294:%.*]] = shufflevector <16 x i16> [[VEC273]], <16 x i16> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GCN-NEXT:    [[TMP55:%.*]] = shufflevector <2 x i16> [[TMP5]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP55:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC2115:%.*]] = shufflevector <16 x i16> [[VEC294]], <16 x i16> [[TMP55]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; GCN-NEXT:    [[TMP56:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP56:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC2136:%.*]] = shufflevector <16 x i16> [[VEC2115]], <16 x i16> [[TMP56]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
-; GCN-NEXT:    [[TMP59:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GCN-NEXT:    [[TMP59:%.*]] = shufflevector <2 x i16> [[TMP42]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; GCN-NEXT:    [[VEC2151:%.*]] = shufflevector <16 x i16> [[VEC2136]], <16 x i16> [[TMP59]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; GCN-NEXT:    [[TMP57:%.*]] = shufflevector <2 x i16> [[TMP41]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC22:%.*]] = insertelement <16 x i16> [[TMP57]], i16 [[PHI2]], i64 2
-; GCN-NEXT:    [[VEC23:%.*]] = insertelement <16 x i16> [[VEC22]], i16 [[PHI3]], i64 3
-; GCN-NEXT:    [[VEC24:%.*]] = insertelement <16 x i16> [[VEC23]], i16 [[PHI4]], i64 4
-; GCN-NEXT:    [[VEC25:%.*]] = insertelement <16 x i16> [[VEC24]], i16 [[PHI5]], i64 5
-; GCN-NEXT:    [[VEC26:%.*]] = insertelement <16 x i16> [[VEC25]], i16 [[PHI6]], i64 6
-; GCN-NEXT:    [[VEC27:%.*]] = insertelement <16 x i16> [[VEC26]], i16 [[PHI7]], i64 7
-; GCN-NEXT:    [[VEC28:%.*]] = insertelement <16 x i16> [[VEC27]], i16 [[PHI8]], i64 8
-; GCN-NEXT:    [[VEC29:%.*]] = insertelement <16 x i16> [[VEC28]], i16 [[PHI9]], i64 9
-; GCN-NEXT:    [[VEC210:%.*]] = insertelement <16 x i16> [[VEC29]], i16 [[PHI10]], i64 10
-; GCN-NEXT:    [[VEC211:%.*]] = insertelement <16 x i16> [[VEC210]], i16 [[PHI11]], i64 11
-; GCN-NEXT:    [[VEC212:%.*]] = insertelement <16 x i16> [[VEC211]], i16 [[PHI12]], i64 12
-; GCN-NEXT:    [[VEC213:%.*]] = insertelement <16 x i16> [[VEC212]], i16 [[PHI13]], i64 13
-; GCN-NEXT:    [[TMP61:%.*]] = shufflevector <2 x i16> [[TMP42]], <2 x i16> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GCN-NEXT:    [[VEC2152:%.*]] = shufflevector <16 x i16> [[VEC213]], <16 x i16> [[TMP61]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; GCN-NEXT:    store <16 x i16> [[VEC2151]], ptr [[OUT]], align 32
-; GCN-NEXT:    store <16 x i16> [[VEC2157]], ptr [[OUT1]], align 32
-; GCN-NEXT:    store <16 x i16> [[VEC2152]], ptr [[OUT2]], align 32
+; GCN-NEXT:    store <16 x i16> [[VEC2157]], ptr [[OUT]], align 32
+; GCN-NEXT:    store <16 x i16> [[TMP49]], ptr [[OUT1]], align 32
+; GCN-NEXT:    store <16 x i16> [[VEC2151]], ptr [[OUT2]], align 32
 ; GCN-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
index f8522bc546e6b..e4daba253b439 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
@@ -45,12 +45,13 @@ declare float @llvm.fmuladd.f32(float, float, float)
 define void @test(float %a) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD_I157:%.*]] = fadd float 0.000000e+00, [[A:%.*]]
+; CHECK-NEXT:    [[ADD23_I:%.*]] = fadd float 0.000000e+00, [[A]]
+; CHECK-NEXT:    [[INSERT:%.*]] = insertelement <2 x float> zeroinitializer, float [[ADD_I157]], i64 0
+; CHECK-NEXT:    [[INSERT_I:%.*]] = insertelement <2 x float> [[INSERT]], float [[ADD23_I]], i64 1
+; CHECK-NEXT:    [[AGG:%.*]] = insertelement <2 x float> [[INSERT_I]], float [[ADD_I157]], i64 1
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
index 28bab3276c47d..6942df532ae29 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
@@ -7,9 +7,8 @@ define void @foo(double %i) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double 0.000000e+00>, double [[I]], i32 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]]
 ; CHECK-NEXT:    [[I82:%.*]] = fsub double 0.000000e+00, poison
+; CHECK-NEXT:    [[I103:%.*]] = fsub double 0.000000e+00, [[I]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 1, i32 poison, i32 0, i32 poison, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> <double 0.000000e+00, double poison, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 1, i32 poison, i32 3, i32 12, i32 5, i32 poison, i32 7>
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[I82]], i32 2
@@ -22,13 +21,11 @@ define void @foo(double %i) {
 ; CHECK-NEXT:    [[TMP17:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP16]])
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[BB58:%.*]], label [[BB115:%.*]]
 ; CHECK:       bb115:
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP4]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 1
+; CHECK-NEXT:    [[TMP19:%.*]] = fmul double 0.000000e+00, [[I103]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul double 0.000000e+00, [[I82]]
 ; CHECK-NEXT:    [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, <4 x double> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, double [[I82]], i32 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer
 ; CHECK-NEXT:    [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
index a8160d77619c5..3d45ebdc38968 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
@@ -5,11 +5,11 @@ define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> undef, float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP5]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = fsub float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP8]], i32 0
 ; CHECK-NEXT:    ret void
 ;
 entry:

>From c9241b02804133e31c4e83630646840bb2a18263 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Apr 2025 16:25:57 -0700
Subject: [PATCH 2/3] Use correct mask

Change-Id: Ie1209bb7f6c49992d41e3fec8a195a81d04f34d4
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  2 +-
 .../SLPVectorizer/X86/buildvector-shuffle.ll  |  9 +++----
 .../X86/vec_list_bias-inseltpoison.ll         | 25 ++++++++++---------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fd58da64901f3..4f4e708bd90ad 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14916,7 +14916,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
           if (Mask[I] != PoisonMaskElem)
             ResizeMask[Mask[I]] = Mask[I];
         }
-        if (!ShuffleVectorInst::isIdentityMask(Mask, VF))
+        if (!ShuffleVectorInst::isIdentityMask(ResizeMask, VF))
           C = ::getShuffleCost(
               *TTI, TTI::SK_PermuteSingleSrc,
               getWidenedType(TE->getMainOp()->getType(), VecVF), ResizeMask);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
index e4daba253b439..40d7dde56e90b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
@@ -45,13 +45,12 @@ declare float @llvm.fmuladd.f32(float, float, float)
 define void @test(float %a) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[ADD_I157:%.*]] = fadd float 0.000000e+00, [[A:%.*]]
-; CHECK-NEXT:    [[ADD23_I:%.*]] = fadd float 0.000000e+00, [[A]]
-; CHECK-NEXT:    [[INSERT:%.*]] = insertelement <2 x float> zeroinitializer, float [[ADD_I157]], i64 0
-; CHECK-NEXT:    [[INSERT_I:%.*]] = insertelement <2 x float> [[INSERT]], float [[ADD23_I]], i64 1
-; CHECK-NEXT:    [[AGG:%.*]] = insertelement <2 x float> [[INSERT_I]], float [[ADD_I157]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
index e3a6020a542fb..2cc2f28ccf6d5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
@@ -25,7 +25,6 @@ define void @test(ptr nocapture %t2) {
 ; CHECK-NEXT:    [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
 ; CHECK-NEXT:    [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
 ; CHECK-NEXT:    [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
-; CHECK-NEXT:    [[T32:%.*]] = mul nsw i32 [[T27]], 6270
 ; CHECK-NEXT:    [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
 ; CHECK-NEXT:    [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
 ; CHECK-NEXT:    [[T39:%.*]] = add nsw i32 [[T37]], [[T38]]
@@ -34,7 +33,6 @@ define void @test(ptr nocapture %t2) {
 ; CHECK-NEXT:    [[T42:%.*]] = mul nsw i32 [[T17]], 16819
 ; CHECK-NEXT:    [[T47:%.*]] = mul nsw i32 [[T37]], -16069
 ; CHECK-NEXT:    [[T48:%.*]] = mul nsw i32 [[T38]], -3196
-; CHECK-NEXT:    [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
 ; CHECK-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4
 ; CHECK-NEXT:    [[T9:%.*]] = load i32, ptr [[T8]], align 4
@@ -42,17 +40,20 @@ define void @test(ptr nocapture %t2) {
 ; CHECK-NEXT:    [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
 ; CHECK-NEXT:    [[T31:%.*]] = mul nsw i32 [[T30]], 4433
 ; CHECK-NEXT:    [[T34:%.*]] = mul nsw i32 [[T29]], -15137
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[T40]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T48]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2
-; CHECK-NEXT:    [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
-; CHECK-NEXT:    [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T40]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T27]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T47]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> <i32 poison, i32 poison, i32 6270, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 4, i32 poison, i32 2, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T48]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T40]], i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[T701:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 poison, i32 3>
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
-; CHECK-NEXT:    [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
-; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3)
+; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3)
 ; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;

>From 2fd89792810a9cfee1bcd7b3e679fec5cdb60226 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 11 Jun 2025 07:24:26 -0700
Subject: [PATCH 3/3] Revert unchanged tests

Change-Id: I9c4865641974152f6289df79df4aa057cdcdb8ed
---
 .../SLPVectorizer/X86/buildvector-shuffle.ll           |  6 +++---
 .../X86/insertelements-with-reused-indices.ll          | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
index 40d7dde56e90b..f8522bc546e6b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
@@ -46,11 +46,11 @@ define void @test(float %a) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
index 3d45ebdc38968..a8160d77619c5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
@@ -5,11 +5,11 @@ define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> undef, float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP5]], float 0.000000e+00, i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP4]], i32 0
 ; CHECK-NEXT:    ret void
 ;
 entry:



More information about the llvm-commits mailing list