[llvm] 007177b - [SLP]Fix PR61018: Assertion `Mask[I] == UndefMaskElem && "Multiple uses

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 27 10:11:16 PST 2023


Author: Alexey Bataev
Date: 2023-02-27T10:09:48-08:00
New Revision: 007177bdde60a822ccaefc96593e818c2235e2d5

URL: https://github.com/llvm/llvm-project/commit/007177bdde60a822ccaefc96593e818c2235e2d5
DIFF: https://github.com/llvm/llvm-project/commit/007177bdde60a822ccaefc96593e818c2235e2d5.diff

LOG: [SLP]Fix PR61018: Assertion `Mask[I] == UndefMaskElem && "Multiple uses
of scalars."' failed.

Need to check for the reused indices when checking if 2 insertelement
instruction are from the same buildvector. If the inidices are reused,
better not to match buildvectors and consider them as differenet,
otherwise need to track the order of insertelement operations.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ab4cfb118bf0..2388969b30ff 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4015,26 +4015,30 @@ static bool areTwoInsertFromSameBuildVector(
   // Go through the vector operand of insertelement instructions trying to find
   // either VU as the original vector for IE2 or V as the original vector for
   // IE1.
+  SmallSet<int, 8> ReusedIdx;
+  bool IsReusedIdx = false;
   do {
-    if (IE2 == VU)
+    if (IE2 == VU && !IE1)
       return VU->hasOneUse();
-    if (IE1 == V)
+    if (IE1 == V && !IE2)
       return V->hasOneUse();
-    if (IE1) {
-      if ((IE1 != VU && !IE1->hasOneUse()) ||
-          getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
+    if (IE1 && IE1 != V) {
+      IsReusedIdx |=
+          !ReusedIdx.insert(getInsertIndex(IE1).value_or(*Idx2)).second;
+      if ((IE1 != VU && !IE1->hasOneUse()) || IsReusedIdx)
         IE1 = nullptr;
       else
         IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
     }
-    if (IE2) {
-      if ((IE2 != V && !IE2->hasOneUse()) ||
-          getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
+    if (IE2 && IE2 != VU) {
+      IsReusedIdx |=
+          !ReusedIdx.insert(getInsertIndex(IE2).value_or(*Idx1)).second;
+      if ((IE2 != V && !IE2->hasOneUse()) || IsReusedIdx)
         IE2 = nullptr;
       else
         IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
     }
-  } while (IE1 || IE2);
+  } while (!IsReusedIdx && (IE1 || IE2));
   return false;
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
new file mode 100644
index 000000000000..e46c3b94383f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=x86_64-unknown-linux-gnu --passes=slp-vectorizer < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP4]], i32 0
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %0 = fsub float 0.000000e+00, 0.000000e+00
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = insertelement <4 x float> %1, float %0, i32 1
+  %3 = fsub float 0.000000e+00, 0.000000e+00
+  %4 = fmul float 0.000000e+00, %3
+  %5 = insertelement <4 x float> %2, float 0.000000e+00, i32 0
+  %6 = insertelement <4 x float> %5, float %4, i32 0
+  %7 = fsub float 0.000000e+00, %0
+  %8 = fsub float %7, %4
+  %9 = insertelement <2 x float> zeroinitializer, float %8, i32 0
+  unreachable
+}


        


More information about the llvm-commits mailing list