[llvm] b7574b0 - [SLP] Don't try to vectorize pair with insertelement

Anton Afanasyev via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 15 05:13:34 PST 2022


Author: Anton Afanasyev
Date: 2022-02-15T16:12:59+03:00
New Revision: b7574b092a97ef59631aa3e801b6c04665409147

URL: https://github.com/llvm/llvm-project/commit/b7574b092a97ef59631aa3e801b6c04665409147
DIFF: https://github.com/llvm/llvm-project/commit/b7574b092a97ef59631aa3e801b6c04665409147.diff

LOG: [SLP] Don't try to vectorize pair with insertelement

Particularly this breaks vectorization of insertelements where some of
intermediate (i.e. not last) insertelements are used externally.

Fixes PR52275
Fixes #51617

Differential Revision: https://reviews.llvm.org/D119679

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7e9d8058d1d91..d29440a8d1566 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4023,13 +4023,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       ValueSet SourceVectors;
       for (Value *V : VL) {
         SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
-        if (getInsertIndex(V) == None) {
-          LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
-                               "non-constant or undef index.\n");
-          newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
-          BS.cancelScheduling(VL, VL0);
-          return;
-        }
+        assert(getInsertIndex(V) != None && "Non-constant or undef index?");
       }
 
       if (count_if(VL, [&SourceVectors](Value *V) {
@@ -8617,6 +8611,8 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
 bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
   if (!A || !B)
     return false;
+  if (isa<InsertElementInst>(A) || isa<InsertElementInst>(B))
+    return false;
   Value *VL[] = {A, B};
   return tryToVectorizeList(VL, R);
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
index a8b8886cbe2c9..5f401a8d4b220 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
@@ -4,21 +4,21 @@
 
 define  <4 x i8> @test(<4 x i8> %v, i8* %x) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
+; CHECK-NEXT:    [[X0:%.*]] = load i8, i8* [[X:%.*]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 1
+; CHECK-NEXT:    [[X1:%.*]] = load i8, i8* [[G1]], align 4
+; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0
+; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1
+; CHECK-NEXT:    [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]]
 ; CHECK-NEXT:    ret <4 x i8> [[V2]]
 ;
 ; FORCE_SLP-LABEL: @test(
-; FORCE_SLP-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
-; FORCE_SLP-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
-; FORCE_SLP-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
-; FORCE_SLP-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; FORCE_SLP-NEXT:    [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; FORCE_SLP-NEXT:    [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
+; FORCE_SLP-NEXT:    [[X0:%.*]] = load i8, i8* [[X:%.*]], align 4
+; FORCE_SLP-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 1
+; FORCE_SLP-NEXT:    [[X1:%.*]] = load i8, i8* [[G1]], align 4
+; FORCE_SLP-NEXT:    [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0
+; FORCE_SLP-NEXT:    [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1
+; FORCE_SLP-NEXT:    [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]]
 ; FORCE_SLP-NEXT:    ret <4 x i8> [[V2]]
 ;
   %x0 = load i8, i8* %x, align 4
@@ -32,19 +32,25 @@ define  <4 x i8> @test(<4 x i8> %v, i8* %x) {
 
 define  <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; CHECK-NEXT:    [[T11:%.*]] = add <2 x i8> [[TMP3]], [[TMP3]]
+; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; CHECK-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
+; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; CHECK-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
+; CHECK-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
 ; CHECK-NEXT:    ret <2 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test2(
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; FORCE_SLP-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; FORCE_SLP-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; FORCE_SLP-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[TMP3]], [[TMP3]]
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
+; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
+; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <2 x i8> [[T11]]
 ;
   %t3 = load i32, i32* %t1, align 4
@@ -60,21 +66,25 @@ define  <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
 
 define  <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
 ; CHECK-LABEL: @test_reorder(
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[T11:%.*]] = add <2 x i8> [[TMP4]], [[TMP4]]
+; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; CHECK-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
+; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; CHECK-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
+; CHECK-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
 ; CHECK-NEXT:    ret <2 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test_reorder(
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; FORCE_SLP-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; FORCE_SLP-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; FORCE_SLP-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; FORCE_SLP-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <2 x i32> <i32 1, i32 0>
-; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[TMP4]], [[TMP4]]
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
+; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
+; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <2 x i8> [[T11]]
 ;
   %t3 = load i32, i32* %t1, align 4
@@ -90,23 +100,25 @@ define  <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
 
 define  <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
 ; CHECK-LABEL: @test_subvector(
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T101:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    [[T11:%.*]] = add <4 x i8> [[T101]], [[T101]]
+; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; CHECK-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
+; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; CHECK-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
+; CHECK-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
 ; CHECK-NEXT:    ret <4 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test_subvector(
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; FORCE_SLP-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; FORCE_SLP-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; FORCE_SLP-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; FORCE_SLP-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; FORCE_SLP-NEXT:    [[T101:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T101]], [[T101]]
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
+; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
+; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <4 x i8> [[T11]]
 ;
   %t3 = load i32, i32* %t1, align 4
@@ -122,23 +134,25 @@ define  <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
 
 define  <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) {
 ; CHECK-LABEL: @test_subvector_reorder(
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T81:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    [[T11:%.*]] = add <4 x i8> [[T81]], [[T81]]
+; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; CHECK-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
+; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; CHECK-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
+; CHECK-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
 ; CHECK-NEXT:    ret <4 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test_subvector_reorder(
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
-; FORCE_SLP-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
-; FORCE_SLP-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
-; FORCE_SLP-NEXT:    [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; FORCE_SLP-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
-; FORCE_SLP-NEXT:    [[T81:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T81]], [[T81]]
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
+; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
+; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
+; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
+; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <4 x i8> [[T11]]
 ;
   %t3 = load i32, i32* %t1, align 4


        


More information about the llvm-commits mailing list