[llvm] 036e48e - [SLP]Fix PR76850: do the analysis of the submask.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 8 07:53:34 PST 2024


Author: Alexey Bataev
Date: 2024-01-08T07:51:02-08:00
New Revision: 036e48e2f5f890e1f9574cdb610e2336f12038a2

URL: https://github.com/llvm/llvm-project/commit/036e48e2f5f890e1f9574cdb610e2336f12038a2
DIFF: https://github.com/llvm/llvm-project/commit/036e48e2f5f890e1f9574cdb610e2336f12038a2.diff

LOG: [SLP]Fix PR76850: do the analysis of the submask.

Need to limit the transformation of the VecMask by the corresponding part of the mask of SliceSize size to avoid compiler crash during further cost analysis.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cbe767537a1d63..8e22b54f002d1c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10596,7 +10596,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
   inversePermutation(E->ReorderIndices, ReorderMask);
   if (!ReorderMask.empty())
     reorderScalars(GatheredScalars, ReorderMask);
-  auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF) {
+  auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF,
+                             unsigned I, unsigned SliceSize) {
     if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
           return isa<UndefValue>(V) && !isa<PoisonValue>(V);
         }))
@@ -10619,11 +10620,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
          Idx == 0) ||
         (Mask.size() == InputVF &&
          ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))) {
-      std::iota(Mask.begin(), Mask.end(), 0);
+      std::iota(std::next(Mask.begin(), I * SliceSize),
+                std::next(Mask.begin(), (I + 1) * SliceSize), 0);
     } else {
-      unsigned I =
+      unsigned IVal =
           *find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
-      std::fill(Mask.begin(), Mask.end(), I);
+      std::fill(std::next(Mask.begin(), I * SliceSize),
+                std::next(Mask.begin(), (I + 1) * SliceSize), IVal);
     }
     return true;
   };
@@ -10872,7 +10875,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
       } else if (Vec1) {
         IsUsedInExpr &= FindReusedSplat(
             ExtractMask,
-            cast<FixedVectorType>(Vec1->getType())->getNumElements());
+            cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0,
+            ExtractMask.size());
         ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
         IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
       } else {
@@ -10898,7 +10902,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
         copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
         if (TEs.size() == 1) {
           IsUsedInExpr &=
-              FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
+              FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
           ShuffleBuilder.add(*TEs.front(), VecMask);
           if (TEs.front()->VectorizedValue)
             IsNonPoisoned &=

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll
new file mode 100644
index 00000000000000..5e5981bdaaa804
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu %s -o - -slp-threshold=-100 | FileCheck %s
+declare i64 @llvm.smax.i64(i64, i64)
+
+define i8 @foo(i64 %val_i64_57) {
+; CHECK-LABEL: define i8 @foo(
+; CHECK-SAME: i64 [[VAL_I64_57:%.*]]) {
+; CHECK-NEXT:  entry_1:
+; CHECK-NEXT:    [[VAL_I64_58:%.*]] = call i64 @llvm.smax.i64(i64 0, i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> <i64 0, i64 poison, i64 poison, i64 0>, i64 [[VAL_I64_57]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL_I64_58]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sle <4 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret i8 0
+;
+entry_1:
+  %val_i64_58 = call i64 @llvm.smax.i64(i64 0, i64 1)
+  %val_i1_89 = icmp ule i64 %val_i64_57, %val_i64_58
+  %val_i1_95 = icmp sle i64 0, undef
+  %val_i1_98 = icmp uge i64 %val_i64_58, %val_i64_58
+  %val_i1_99 = icmp ule i64 0, %val_i64_58
+  ret i8 0
+}


        


More information about the llvm-commits mailing list