[llvm] 092d628 - [SLP]Check for div/rem instructions before extending with poisons

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 09:34:19 PST 2025


Author: Alexey Bataev
Date: 2025-01-13T09:28:27-08:00
New Revision: 092d6283838dea79670750b9415955c5f0cb5178

URL: https://github.com/llvm/llvm-project/commit/092d6283838dea79670750b9415955c5f0cb5178
DIFF: https://github.com/llvm/llvm-project/commit/092d6283838dea79670750b9415955c5f0cb5178.diff

LOG: [SLP]Check for div/rem instructions before extending with poisons

Need to check if the instructions can be safely extended with poison
before actually doing this to avoid incorrect transformations.

Fixes #122691

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/div-possibly-extended-with-poisons.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index df46c69ff3ab40..4b0ed5b30179b2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8091,6 +8091,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
             NonUniqueValueVL.append(
                 PWSz - UniqueValues.size(),
                 PoisonValue::get(UniqueValues.front()->getType()));
+            // Check that extended with poisons operations are still valid for
+            // vectorization (div/rem are not allowed).
+            if (!getSameOpcode(NonUniqueValueVL, *TLI).valid()) {
+              LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
+              newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
+              return false;
+            }
             VL = NonUniqueValueVL;
           }
           return true;
@@ -17818,7 +17825,7 @@ bool BoUpSLP::collectValuesToDemote(
   };
   if (E.isGather() || !Visited.insert(&E).second ||
       any_of(E.Scalars, [&](Value *V) {
-        return all_of(V->users(), [&](User *U) {
+        return !isa<PoisonValue>(V) && all_of(V->users(), [&](User *U) {
           return isa<InsertElementInst>(U) && !getTreeEntry(U);
         });
       }))

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/div-possibly-extended-with-poisons.ll b/llvm/test/Transforms/SLPVectorizer/X86/div-possibly-extended-with-poisons.ll
new file mode 100644
index 00000000000000..07ee8f840721fd
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/div-possibly-extended-with-poisons.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-100 < %s | FileCheck %s
+
+define i8 @test(ptr %g_127, i32 %0, i16 %1) {
+; CHECK-LABEL: define i8 @test(
+; CHECK-SAME: ptr [[G_127:%.*]], i32 [[TMP0:%.*]], i16 [[TMP1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_INC434_I:.*]]
+; CHECK:       [[FOR_COND166_PREHEADER_I:.*]]:
+; CHECK-NEXT:    br label %[[FOR_INC434_I]]
+; CHECK:       [[FOR_INC434_I]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 60, %[[FOR_COND166_PREHEADER_I]] ]
+; CHECK-NEXT:    [[CONV8_I_I:%.*]] = zext nneg i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[DIV_I_I_1:%.*]] = udiv i64 [[CONV8_I_I]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[DIV_I_I_1]] to i16
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i64> poison, i64 [[CONV8_I_I]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = udiv <4 x i64> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP10]], <4 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 2, i32 poison, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP11]], <8 x i32> <i32 0, i32 8, i32 poison, i32 10, i32 11, i32 poison, i32 13, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP12]], <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 3, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP14:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP13]])
+; CHECK-NEXT:    [[TMP15:%.*]] = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> [[TMP14]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i16 [[TMP15]], [[TMP1]]
+; CHECK-NEXT:    [[AND14_I_2_I_5:%.*]] = zext i16 [[OP_RDX]] to i32
+; CHECK-NEXT:    store i32 [[AND14_I_2_I_5]], ptr [[G_127]], align 4
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+  br label %for.inc434.i
+
+for.cond166.preheader.i:
+  br label %for.inc434.i
+
+for.inc434.i:
+  %2 = phi i64 [ 0, %entry ], [ 60, %for.cond166.preheader.i ]
+  %conv8.i.i = zext nneg i32 %0 to i64
+  %div.i.i.1 = udiv i64 %conv8.i.i, %2
+  %3 = trunc i64 %div.i.i.1 to i16
+  %call12.i.2.i.1 = tail call i16 @llvm.bswap.i16(i16 %3)
+  %and14.i.2.i.118 = and i16 %1, %call12.i.2.i.1
+  %div.i.i.2 = udiv i64 %conv8.i.i, %2
+  %4 = trunc i64 %div.i.i.2 to i16
+  %call12.i.i.2 = tail call i16 @llvm.bswap.i16(i16 %4)
+  %and14.i.i.219 = and i16 %and14.i.2.i.118, %call12.i.i.2
+  %call12.i.2.i.2 = tail call i16 @llvm.bswap.i16(i16 %4)
+  %and14.i.2.i.220 = and i16 %and14.i.i.219, %call12.i.2.i.2
+  %div.i.i.3 = udiv i64 %conv8.i.i, %2
+  %5 = trunc i64 %div.i.i.3 to i16
+  %call12.i.2.i.3 = tail call i16 @llvm.bswap.i16(i16 %5)
+  %and14.i.2.i.322 = and i16 %and14.i.2.i.220, %call12.i.2.i.3
+  %div.i.i.4 = udiv i64 %conv8.i.i, %2
+  %6 = trunc i64 %div.i.i.4 to i16
+  %call12.i.i.4 = tail call i16 @llvm.bswap.i16(i16 %6)
+  %and14.i.i.423 = and i16 %and14.i.2.i.322, %call12.i.i.4
+  %call12.i.2.i.4 = tail call i16 @llvm.bswap.i16(i16 %6)
+  %and14.i.2.i.424 = and i16 %and14.i.i.423, %call12.i.2.i.4
+  %div.i.i.5 = udiv i64 %conv8.i.i, %2
+  %7 = trunc i64 %div.i.i.5 to i16
+  %call12.i.i.5 = tail call i16 @llvm.bswap.i16(i16 %7)
+  %and14.i.i.525 = and i16 %and14.i.2.i.424, %call12.i.i.5
+  %call12.i.2.i.5 = tail call i16 @llvm.bswap.i16(i16 %7)
+  %and14.i.2.i.51 = and i16 %and14.i.i.525, %call12.i.2.i.5
+  %and14.i.2.i.5 = zext i16 %and14.i.2.i.51 to i32
+  store i32 %and14.i.2.i.5, ptr %g_127, align 4
+  ret i8 0
+}


        


More information about the llvm-commits mailing list