[llvm] f4fb854 - [SLP]Do not consider deleted instruction as external users.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 30 05:38:13 PDT 2021


Author: Alexey Bataev
Date: 2021-07-30T05:37:43-07:00
New Revision: f4fb85481136daedfd7f302543f620b0340ec51d

URL: https://github.com/llvm/llvm-project/commit/f4fb85481136daedfd7f302543f620b0340ec51d
DIFF: https://github.com/llvm/llvm-project/commit/f4fb85481136daedfd7f302543f620b0340ec51d.diff

LOG: [SLP]Do not consider deleted instruction as external users.

If the instruction was previously deleted, it should not be treated as
an external user. This fixes cost estimation and removes dead
extractelement instructions.

Differential Revision: https://reviews.llvm.org/D107106

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
    llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
    llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
    llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
    llvm/test/Transforms/SLPVectorizer/X86/resched.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 06a735cad9aa3..ae9d5d6b4fa68 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2946,6 +2946,9 @@ void BoUpSLP::buildExternalUses(
         if (!UserInst)
           continue;
 
+        if (isDeleted(UserInst))
+          continue;
+
         // Skip in-tree scalars that become vectors
         if (TreeEntry *UseEntry = getTreeEntry(U)) {
           Value *UseScalar = UseEntry->Scalars[0];
@@ -6312,7 +6315,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
           LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
 
           // It is legal to delete users in the ignorelist.
-          assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
+          assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) ||
+                  (isa_and_nonnull<Instruction>(U) &&
+                   isDeleted(cast<Instruction>(U)))) &&
                  "Deleting out-of-tree value");
         }
       }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index 02acc84459490..e9c502b6982cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -28,25 +28,9 @@ define void @PR28330(i32 %n) {
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
 ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; GATHER-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
-; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
-; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
-; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
-; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
-; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
-; GATHER-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
-; GATHER-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP3]])
-; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP18]], [[P17]]
-; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
+; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; GATHER-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
+; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
 ; MAX-COST-LABEL: @PR28330(
@@ -148,25 +132,9 @@ define void @PR32038(i32 %n) {
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
 ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; GATHER-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
-; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
-; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
-; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
-; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
-; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
-; GATHER-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
-; GATHER-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP3]])
-; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP18]], -5
-; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
+; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; GATHER-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
+; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
 ; MAX-COST-LABEL: @PR32038(
@@ -184,17 +152,13 @@ define void @PR32038(i32 %n) {
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
 ; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; MAX-COST-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
-; MAX-COST-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; MAX-COST-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; MAX-COST-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; MAX-COST-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
 ; MAX-COST-NEXT:    [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
 ; MAX-COST-NEXT:    [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
-; MAX-COST-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], [[P27]]
-; MAX-COST-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], [[P29]]
-; MAX-COST-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP9]], -5
+; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
+; MAX-COST-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[P27]]
+; MAX-COST-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[P29]]
+; MAX-COST-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP5]], -5
 ; MAX-COST-NEXT:    [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
 ; MAX-COST-NEXT:    [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
 ; MAX-COST-NEXT:    [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
index 66fc799bbbf67..d334197ad9c7b 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -12,18 +12,17 @@ define void @foo() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
 ; CHECK-NEXT:    [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
 ; CHECK-NEXT:    [[ARRAYIDX372_1:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
 ; CHECK-NEXT:    [[ARRAYIDX372_2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[ADD277]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> poison, [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[TMP7]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2) to <2 x i32>*), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
 ; CHECK-NEXT:    [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
 ; CHECK-NEXT:    unreachable
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index edc27a635bd25..761f8c1cb4c6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -45,8 +45,6 @@ define void @Test(i32) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = and <2 x i32> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 ; FORCE_REDUCTION-LABEL: @Test(

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
index 2244cdb69e268..c9cb8951e8822 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -357,10 +357,6 @@ define void @vec_shuff_reorder() #0 {
 ; CHECK-NEXT:    [[TMP15:%.*]] = fadd <4 x float> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = fsub <4 x float> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
 ; CHECK-NEXT:    store <4 x float> [[TMP17]], <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index 9cef06325f414..4f0a8256255d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -845,10 +845,10 @@ define i32 @maxi8_mutiple_uses(i32) {
 ; THRESH-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP4]], i32 1
 ; THRESH-NEXT:    [[TMP15:%.*]] = icmp sgt <2 x i32> [[TMP12]], [[TMP14]]
 ; THRESH-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP12]], <2 x i32> [[TMP14]]
-; THRESH-NEXT:    [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
-; THRESH-NEXT:    [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
-; THRESH-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP18]], [[TMP17]]
-; THRESH-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP18]], i32 [[TMP17]]
+; THRESH-NEXT:    [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
+; THRESH-NEXT:    [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
+; THRESH-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
+; THRESH-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP17]], i32 [[TMP18]]
 ; THRESH-NEXT:    [[TMP19:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
 ; THRESH-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 3, i32 4
 ; THRESH-NEXT:    store i32 [[TMP20]], i32* @var, align 8
@@ -1033,10 +1033,10 @@ define i32 @maxi8_wrong_parent(i32) {
 ; THRESH-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
 ; THRESH-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
 ; THRESH-NEXT:    [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
-; THRESH-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
-; THRESH-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
-; THRESH-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
-; THRESH-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP21]], i32 [[TMP20]]
+; THRESH-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
+; THRESH-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
+; THRESH-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
+; THRESH-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP20]], i32 [[TMP21]]
 ; THRESH-NEXT:    ret i32 [[OP_EXTRA1]]
 ;
   %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
index 520421a9784b2..464288ef340c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
@@ -28,9 +28,9 @@ define void @f(i1 %x) #0 {
 ; CHECK-NEXT:    br i1 [[ICMP_D0]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]]
 ; CHECK-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
 ; CHECK-NEXT:    br label [[IF_END]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
index d4a03929b2afa..29460998d3b19 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
@@ -15,57 +15,48 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_1_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 1
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_2_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 2
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_3_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_4_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 4
-; CHECK-NEXT:    [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_5_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[CONV31_I]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i32> [[TMP7]], <i32 6, i32 7>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP4]], <i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12
+; CHECK-NEXT:    [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
-; CHECK-NEXT:    [[TMP10:%.*]] = lshr <4 x i32> [[TMP4]], <i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[SHR_4_I_I]], i32 5
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i32> [[TMP15]], <16 x i32> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <16 x i32> [[TMP17]], <16 x i32> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <16 x i32> [[TMP19]], <16 x i32> [[TMP20]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:    [[TMP22:%.*]] = trunc <16 x i32> [[TMP21]] to <16 x i8>
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[TMP10]], i32 2
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3
-; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2
-; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0
-; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
-; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
-; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1
-; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x i32> [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP36:%.*]] = and <16 x i8> [[TMP22]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[CONV31_I]], i32 1
+; CHECK-NEXT:    [[TMP17:%.*]] = lshr <2 x i32> [[TMP16]], <i32 14, i32 15>
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <16 x i32> [[TMP18]], <16 x i32> [[TMP19]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <16 x i32> [[TMP20]], <16 x i32> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SHR_12_I_I]], i32 13
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <16 x i32> [[TMP23]], <16 x i32> [[TMP24]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; CHECK-NEXT:    [[TMP26:%.*]] = trunc <16 x i32> [[TMP25]] to <16 x i8>
+; CHECK-NEXT:    [[TMP27:%.*]] = and <16 x i8> [[TMP26]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15
-; CHECK-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP36]], <16 x i8>* [[TMP37]], align 1
+; CHECK-NEXT:    [[TMP28:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
+; CHECK-NEXT:    store <16 x i8> [[TMP27]], <16 x i8>* [[TMP28]], align 1
 ; CHECK-NEXT:    unreachable
 ; CHECK:       if.end50.i:
 ; CHECK-NEXT:    ret void


        


More information about the llvm-commits mailing list