[llvm] 874c49f - [SLP]Fix PR61395: need to adjust vector factor after emitting shuffle

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 14 06:30:46 PDT 2023


Author: Alexey Bataev
Date: 2023-03-14T06:27:08-07:00
New Revision: 874c49f55454cb285282e6d184f809945c0beca1

URL: https://github.com/llvm/llvm-project/commit/874c49f55454cb285282e6d184f809945c0beca1
DIFF: https://github.com/llvm/llvm-project/commit/874c49f55454cb285282e6d184f809945c0beca1.diff

LOG: [SLP]Fix PR61395: need to adjust vector factor after emitting shuffle
operation for combined entries.

The vector factor after combining of the shuffle entries is defined by
the size of the mask, not by the vector factors  of the original
entries. So, need to adjust it to emit correct code.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8256832b64e87..8ca8da430ded6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9448,6 +9448,7 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
     if (GatherShuffle) {
       Vec = CreateShuffle(Entries.front()->VectorizedValue,
                           Entries.back()->VectorizedValue, Mask);
+      VF = Mask.size();
       if (Vec1) {
         // Build final mask.
         for (auto [I, Idx] : enumerate(Mask)) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll b/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
new file mode 100644
index 0000000000000..efa37527ce991
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=icelake-server -S < %s | FileCheck %s
+
+define i1 @test() {
+; CHECK-LABEL: define i1 @test
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 0, 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i64> <i64 poison, i64 poison, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i64> [[TMP1]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i64> [[TMP2]], i64 0, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> <i32 3, i32 undef, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, <4 x i64> [[TMP4]], <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i64> [[TMP3]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sub <8 x i64> [[TMP3]], [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult <8 x i64> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
+; CHECK-NEXT:    ret i1 [[TMP11]]
+;
+entry:
+  %0 = shl i64 0, 0
+  %1 = add i64 0, 0
+  %2 = add i64 0, 0
+  %3 = or i64 %2, %1
+  %cmp750 = icmp ult i64 %3, 0
+  %4 = or i64 %0, %1
+  %cmp752 = icmp ult i64 %4, 0
+  %or753 = or i1 %cmp750, %cmp752
+  %5 = or i64 0, %1
+  %cmp754 = icmp ult i64 %5, 0
+  %or755 = or i1 %or753, %cmp754
+  %6 = extractelement <16 x i64> zeroinitializer, i32 0
+  %7 = sub i64 %1, %6
+  %cmp756 = icmp ult i64 %7, 0
+  %or757 = or i1 %or755, %cmp756
+  %8 = sub i64 0, %2
+  %cmp758 = icmp ult i64 %8, 0
+  %or759 = or i1 %or757, %cmp758
+  %9 = or i64 0, %2
+  %cmp760 = icmp ult i64 %9, 0
+  %or761 = or i1 %or759, %cmp760
+  %10 = or i64 0, %6
+  %cmp762 = icmp ult i64 %10, 0
+  %or763 = or i1 %or761, %cmp762
+  %11 = or i64 0, %0
+  %cmp764 = icmp ult i64 %11, 0
+  %or765 = or i1 %or763, %cmp764
+  ret i1 %or765
+}


        


More information about the llvm-commits mailing list