[llvm] 641939b - [SLP]Remove CreateShuffle lambda and reuse ShuffleBuilder functions.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 14 10:20:03 PDT 2023


Author: Alexey Bataev
Date: 2023-03-14T10:15:41-07:00
New Revision: 641939baa92835a3f588b2f68e6a8cda96a328ef

URL: https://github.com/llvm/llvm-project/commit/641939baa92835a3f588b2f68e6a8cda96a328ef
DIFF: https://github.com/llvm/llvm-project/commit/641939baa92835a3f588b2f68e6a8cda96a328ef.diff

LOG: [SLP]Remove CreateShuffle lambda and reuse ShuffleBuilder functions.

After merging main part of the gather/buildvector code, CreateShuffle
lambda can removed and ShuffleBuilder add functions can be used instead.
Also, part of the code from CreateShuffle migrated to createShuffle of
the BaseShuffleAnalysis::createShuffle function for better code emission.

Differential Revision: https://reviews.llvm.org/D145988

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
    llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll
    llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll
    llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
    llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8ca8da430ded6..6b08c6a49f34d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6629,7 +6629,8 @@ class BaseShuffleAnalysis {
         Op = SV->getOperand(1);
     }
     if (auto *OpTy = dyn_cast<FixedVectorType>(Op->getType());
-        !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute)) {
+        !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute) ||
+        ShuffleVectorInst::isZeroEltSplatMask(Mask)) {
       if (IdentityOp) {
         V = IdentityOp;
         assert(Mask.size() == IdentityMask.size() &&
@@ -6661,6 +6662,8 @@ class BaseShuffleAnalysis {
   static Value *createShuffle(Value *V1, Value *V2, ArrayRef<int> Mask,
                               ShuffleBuilderTy &Builder) {
     assert(V1 && "Expected at least one vector value.");
+    if (V2)
+      Builder.resizeToMatch(V1, V2);
     int VF = Mask.size();
     if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
       VF = FTy->getNumElements();
@@ -6748,6 +6751,15 @@ class BaseShuffleAnalysis {
           CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
         }
       }
+      const int Limit = CombinedMask1.size() * 2;
+      if (Op1 == Op2 && Limit == 2 * VF &&
+          all_of(CombinedMask1, [=](int Idx) { return Idx < Limit; }) &&
+          (ShuffleVectorInst::isIdentityMask(CombinedMask1) ||
+           (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1) &&
+            isa<ShuffleVectorInst>(Op1) &&
+            cast<ShuffleVectorInst>(Op1)->getShuffleMask() ==
+                ArrayRef(CombinedMask1))))
+        return Op1;
       return Builder.createShuffleVector(
           Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
           CombinedMask1);
@@ -9294,44 +9306,6 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
     }
     return VecBase;
   };
-  auto CreateShuffle = [&](Value *V1, Value *V2, ArrayRef<int> Mask) {
-    unsigned VF1 = cast<FixedVectorType>(V1->getType())->getNumElements();
-    unsigned VF2 = cast<FixedVectorType>(V2->getType())->getNumElements();
-    unsigned VF = std::max(VF1, VF2);
-    if (VF1 != VF2) {
-      SmallVector<int> ExtMask(VF, UndefMaskElem);
-      std::iota(ExtMask.begin(), std::next(ExtMask.begin(), std::min(VF1, VF2)),
-                0);
-      if (VF1 < VF2) {
-        V1 = Builder.CreateShuffleVector(V1, ExtMask);
-        if (auto *I = dyn_cast<Instruction>(V1)) {
-          GatherShuffleExtractSeq.insert(I);
-          CSEBlocks.insert(I->getParent());
-        }
-      } else {
-        V2 = Builder.CreateShuffleVector(V2, ExtMask);
-        if (auto *I = dyn_cast<Instruction>(V2)) {
-          GatherShuffleExtractSeq.insert(I);
-          CSEBlocks.insert(I->getParent());
-        }
-      }
-    }
-    const int Limit = Mask.size() * 2;
-    if (V1 == V2 && Mask.size() == VF &&
-        all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
-        (ShuffleVectorInst::isIdentityMask(Mask) ||
-         (ShuffleVectorInst::isZeroEltSplatMask(Mask) &&
-          isa<ShuffleVectorInst>(V1) &&
-          cast<ShuffleVectorInst>(V1)->getShuffleMask() == Mask)))
-      return V1;
-    Value *Vec = V1 == V2 ? Builder.CreateShuffleVector(V1, Mask)
-                          : Builder.CreateShuffleVector(V1, V2, Mask);
-    if (auto *I = dyn_cast<Instruction>(Vec)) {
-      GatherShuffleExtractSeq.insert(I);
-      CSEBlocks.insert(I->getParent());
-    }
-    return Vec;
-  };
   auto NeedToDelay = [=](const TreeEntry *E,
                          ArrayRef<const TreeEntry *> Deps) -> Value * {
     // No need to delay emission if all deps are ready.
@@ -9438,29 +9412,20 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
         }
       }
       if (Vec2)
-        Vec1 = CreateShuffle(Vec1, Vec2, ExtractMask);
+        ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
       else if (Vec1)
-        Vec1 = CreateShuffle(Vec1, Vec1, ExtractMask);
+        ShuffleBuilder.add(Vec1, ExtractMask);
       else
-        Vec1 = PoisonValue::get(
-            FixedVectorType::get(ScalarTy, GatheredScalars.size()));
+        ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
+                               ScalarTy, GatheredScalars.size())),
+                           ExtractMask);
     }
     if (GatherShuffle) {
-      Vec = CreateShuffle(Entries.front()->VectorizedValue,
-                          Entries.back()->VectorizedValue, Mask);
-      VF = Mask.size();
-      if (Vec1) {
-        // Build final mask.
-        for (auto [I, Idx] : enumerate(Mask)) {
-          if (ExtractMask[I] != UndefMaskElem)
-            Idx = I;
-          else if (Idx != UndefMaskElem)
-            Idx = I + VF;
-        }
-        Vec = CreateShuffle(Vec1, Vec, Mask);
-      }
-    } else {
-      Vec = Vec1;
+      if (Entries.size() == 1)
+        ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
+      else
+        ShuffleBuilder.add(Entries.front()->VectorizedValue,
+                           Entries.back()->VectorizedValue, Mask);
     }
   } else if (!allConstant(E->Scalars)) {
     // TODO: remove this code once able to combine shuffled vectors and build
@@ -9556,12 +9521,13 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
     }
     // Gather unique scalars and all constants.
     Vec = gather(GatheredScalars);
+    ShuffleBuilder.add(Vec, ReuseMask);
   } else {
     // Gather all constants.
     Vec = gather(E->Scalars);
+    ShuffleBuilder.add(Vec, ReuseMask);
   }
 
-  ShuffleBuilder.add(Vec, ReuseMask);
   Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
   if (NeedFreeze)
     Vec = Builder.CreateFreeze(Vec);

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
index 2af7efb3d81fe..48ebff8ebabb6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
@@ -211,15 +211,14 @@ define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> <i32 2, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 0>
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_0]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_1]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_2]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT:    store <9 x double> [[TMP4]], ptr [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[TMP3]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index ae8a52e669d35..abf1d7abdc122 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -13,7 +13,7 @@ define void @Test(i32) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP0:%.*]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <2 x i32> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> <i32 0, i32 3>

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
index 2827e3ba42af5..001da64c60a93 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
@@ -12,7 +12,7 @@ define double @test() {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll b/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
index efa37527ce991..d9942887d6c7d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
@@ -10,14 +10,13 @@ define i1 @test() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i64> [[TMP1]], i64 0, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i64> [[TMP2]], i64 0, i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> <i32 3, i32 undef, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, <4 x i64> [[TMP4]], <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i64> [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = sub <8 x i64> [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult <8 x i64> [[TMP9]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
-; CHECK-NEXT:    ret i1 [[TMP11]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i64> <i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <8 x i64> [[TMP3]], <8 x i32> <i32 11, i32 11, i32 11, i32 1, i32 9, i32 9, i32 1, i32 8>
+; CHECK-NEXT:    [[TMP6:%.*]] = or <8 x i64> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = sub <8 x i64> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult <8 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP9]])
+; CHECK-NEXT:    ret i1 [[TMP10]]
 ;
 entry:
   %0 = shl i64 0, 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
index e1c816fd383d7..97189e4ef2a14 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
@@ -10,11 +10,10 @@ define i32 @foo(i32 %a) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], <i32 3, i32 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
-; CHECK-NEXT:    [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], <i32 3, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[OP_RDX10:%.*]] = add i32 [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
@@ -23,10 +22,10 @@ define i32 @foo(i32 %a) {
 ; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       bb4:
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP2]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
-; CHECK-NEXT:    [[OP_RDX8:%.*]] = add i32 [[TMP10]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP2]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT:    [[OP_RDX8:%.*]] = add i32 [[TMP9]], 0
 ; CHECK-NEXT:    [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[OP_RDX9]]
 ; CHECK:       bb5:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index 469b15d074186..adbeb638c1a46 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -643,12 +643,11 @@ define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]]
-; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]]
-; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; SSE-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; SSE-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; SSE-NEXT:    [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]]
+; SSE-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
+; SSE-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
+; SSE-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; SSE-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; SSE-NEXT:    [[ADD3:%.*]] = fadd double [[TMP6]], [[TMP7]]
 ; SSE-NEXT:    ret double [[ADD3]]
 ;
 ; AVX-LABEL: @splat_loads(
@@ -700,14 +699,13 @@ define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptr
 ; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]]
-; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]]
-; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; SSE-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]]
-; SSE-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
-; SSE-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
-; SSE-NEXT:    [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]]
+; SSE-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
+; SSE-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
+; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
+; SSE-NEXT:    [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
+; SSE-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
+; SSE-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
+; SSE-NEXT:    [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]]
 ; SSE-NEXT:    ret double [[RES]]
 ;
 ; AVX-LABEL: @splat_loads_with_internal_uses(

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 6e0aafeacd309..0be5e2d9d536b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -333,16 +333,15 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-NEXT:    [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
 ; CHECK-NEXT:    [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
 ; CHECK-NEXT:    [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y1]], i32 5
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y2]], i32 6
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y3]], i32 7
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP2]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]])
-; CHECK-NEXT:    ret i1 [[TMP9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[Y1]], i32 5
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y2]], i32 6
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y3]], i32 7
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze <8 x i1> [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP7]])
+; CHECK-NEXT:    ret i1 [[TMP8]]
 ;
   %x0 = extractelement <8 x i32> %x, i32 0
   %x1 = extractelement <8 x i32> %x, i32 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
index 3b4938f220fdd..e378909b49b92 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
@@ -18,11 +18,10 @@ define void @test(ptr %a, ptr %b) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = fadd <4 x float> [[TMP9]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer
-; CHECK-NEXT:    store <4 x float> [[TMP13]], ptr [[RESULT]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <4 x float> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    store <4 x float> [[TMP12]], ptr [[RESULT]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY]]
 ;
 entry:


        


More information about the llvm-commits mailing list