[llvm] 9570364 - [SLP]Fix PR72202: wrong mask emission for the first found vector

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 16 07:04:08 PST 2023


Author: Alexey Bataev
Date: 2023-11-16T07:01:05-08:00
New Revision: 95703642e3ef617275fd80b5316b05c5a09c6219

URL: https://github.com/llvm/llvm-project/commit/95703642e3ef617275fd80b5316b05c5a09c6219
DIFF: https://github.com/llvm/llvm-project/commit/95703642e3ef617275fd80b5316b05c5a09c6219.diff

LOG: [SLP]Fix PR72202: wrong mask emission for the first found vector
operand.

Need to copy the submask not to the very first part of the common
extractelements vector mask, but to the proper one to avoid wrong code
emission.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/extractelements-vector-ops-shuffle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d8208cabe6dde0d..038c9fc9a7938d7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10221,7 +10221,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
                                       });
                                     })) &&
                "Expected first part or all previous parts masked.");
-        copy(SubMask, VecMask.begin());
+        copy(SubMask, std::next(VecMask.begin(), Part * SliceSize));
       } else {
         unsigned VF = cast<FixedVectorType>(Vec->getType())->getNumElements();
         if (Vec->getType() != SubVec->getType()) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelements-vector-ops-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelements-vector-ops-shuffle.ll
index 85bc16bbe043d5c..0222e0aaeea3ed0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelements-vector-ops-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelements-vector-ops-shuffle.ll
@@ -7,23 +7,24 @@ define double @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <4 x double> zeroinitializer, [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = call reassoc nsz double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul double [[TMP5]], 0.000000e+00
-; CHECK-NEXT:    store double [[TMP6]], ptr null, align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call reassoc nsz double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul double [[TMP6]], 0.000000e+00
+; CHECK-NEXT:    store double [[TMP7]], ptr null, align 16
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb:
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd double [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = fadd double [[TMP12]], [[TMP10]]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd double [[TMP14]], [[TMP13]]
-; CHECK-NEXT:    ret double [[TMP15]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = fadd double [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = fadd double [[TMP15]], [[TMP14]]
+; CHECK-NEXT:    ret double [[TMP16]]
 ;
 entry:
   %0 = fmul double 0.000000e+00, 0.000000e+00


        


More information about the llvm-commits mailing list