[llvm] ab9a81f - [SLP]Try to emit canonical shuffle with undef operand.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 4 08:17:45 PDT 2022


Author: Alexey Bataev
Date: 2022-10-04T08:16:07-07:00
New Revision: ab9a81f736acfb927b0e0b4f0de8710fc2379f70

URL: https://github.com/llvm/llvm-project/commit/ab9a81f736acfb927b0e0b4f0de8710fc2379f70
DIFF: https://github.com/llvm/llvm-project/commit/ab9a81f736acfb927b0e0b4f0de8710fc2379f70.diff

LOG: [SLP]Try to emit canonical shuffle with undef operand.

In the canonical form of the shuffle the poison/undef operand is the
second operand, the patch tries to emit canonical form for partial
vectorization of the buildvector sequence.
Also, this patch starts emitting freeze instruction for shuffles with undef indices if the second shuffle operan is undef, not poison. It is an initial step to D93818, where undef mask element are treated as returning poison value.

Differential Revision: https://reviews.llvm.org/D134377

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index faec5611979d5..a9d1b89fcf822 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8181,21 +8181,37 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       SmallVector<int> InsertMask(NumElts, UndefMaskElem);
       for (unsigned I = 0; I < NumElts; I++) {
         if (Mask[I] != UndefMaskElem)
-          InsertMask[Offset + I] = NumElts + I;
-      }
-      if (Offset != 0 ||
-          !isUndefVector(FirstInsert->getOperand(0), InsertMask)) {
-        for (unsigned I = 0; I < NumElts; I++) {
-          if (InsertMask[I] == UndefMaskElem)
-            InsertMask[I] = I;
-        }
-
-        V = Builder.CreateShuffleVector(
-            FirstInsert->getOperand(0), V, InsertMask,
-            cast<Instruction>(E->Scalars.back())->getName());
-        if (auto *I = dyn_cast<Instruction>(V)) {
-          GatherShuffleSeq.insert(I);
-          CSEBlocks.insert(I->getParent());
+          InsertMask[Offset + I] = I;
+      }
+      bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask);
+      if ((!IsIdentity || Offset != 0 || !IsFirstUndef) &&
+          NumElts != NumScalars) {
+        if (IsFirstUndef) {
+          if (!ShuffleVectorInst::isIdentityMask(InsertMask)) {
+            V = Builder.CreateShuffleVector(
+                V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
+            if (auto *I = dyn_cast<Instruction>(V)) {
+              GatherShuffleSeq.insert(I);
+              CSEBlocks.insert(I->getParent());
+            }
+            // Create freeze for undef values.
+            if (!isa<PoisonValue>(FirstInsert->getOperand(0)))
+              V = Builder.CreateFreeze(V);
+          }
+        } else {
+          for (unsigned I = 0; I < NumElts; I++) {
+            if (InsertMask[I] == UndefMaskElem)
+              InsertMask[I] = I;
+            else
+              InsertMask[I] += NumElts;
+          }
+          V = Builder.CreateShuffleVector(
+              FirstInsert->getOperand(0), V, InsertMask,
+              cast<Instruction>(E->Scalars.back())->getName());
+          if (auto *I = dyn_cast<Instruction>(V)) {
+            GatherShuffleSeq.insert(I);
+            CSEBlocks.insert(I->getParent());
+          }
         }
       }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
index 0fc1c7a363421..660aee2f8ce68 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
@@ -11,7 +11,7 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
index c8db4960d51d3..bbeb19e6c6f24 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
@@ -11,7 +11,8 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze <8 x i32> [[VECINS_I_5_I1]]
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
index 8224ffd4d0bcf..2c635b4d6841b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
@@ -305,7 +305,7 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
 ; CHECK-NEXT:    ret <4 x float> [[RD1]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 5be933e0e4815..6302ef6945f52 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -339,8 +339,9 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[RD1]]
+; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP19:%.*]] = freeze <4 x float> [[RD1]]
+; CHECK-NEXT:    ret <4 x float> [[TMP19]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
   %c1 = extractelement <4 x i32> %c, i32 1


        


More information about the llvm-commits mailing list