[PATCH] D155275: [SLP] Set ExtendingManyInputs argument of addMask based on size of InVectors (PR63668)
    Vedant Paranjape via Phabricator via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Fri Jul 14 01:56:52 PDT 2023
    
    
  
vedant-amd created this revision.
Herald added subscribers: vporpo, hiraditya.
Herald added a project: All.
vedant-amd requested review of this revision.
Herald added subscribers: llvm-commits, wangpc.
Herald added a project: LLVM.
In the finalize function, addMask is always called with
ExtendingManyInputs=true, this need not be always true. In case size of
InVectors is 1, then there is just one input, and this option being
passed, triggers an assert in the addMask function.
void addMask(llvm::SmallVectorImpl<int>&, llvm::ArrayRef<int>, bool):
Assertion `(!ExtendingManyInputs || SubMask.size() > Mask.size()) &&
"SubMask with many inputs support must be larger than the mask."'
failed.
This patch, adds a check on the size of InVectors to set the
ExtendingManyInputs flag and also adds a testcase to exhibit the issue.
This issue was also reported in PR#63668 and fixes it.
Repository:
  rG LLVM Github Monorepo
https://reviews.llvm.org/D155275
Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
Index: llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s
+
+define internal i32 @testfunc() {
+; CHECK-LABEL: define internal i32 @testfunc
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br label [[TMP1:%.*]]
+; CHECK:       1:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ 0.000000e+00, [[TMP8:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ]
+; CHECK-NEXT:    br i1 false, label [[TMP8]], label [[TMP5:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    br i1 false, label [[TMP6:%.*]], label [[TMP8]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> zeroinitializer, i64 0
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi float [ [[TMP2]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP2]], [[TMP5]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi float [ [[TMP3]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP3]], [[TMP5]] ]
+; CHECK-NEXT:    br label [[TMP1]]
+;
+  br label %1
+
+1:                                                ; preds = %8, %0
+  %2 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  %3 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  %4 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  br i1 false, label %8, label %5
+
+5:                                                ; preds = %1
+  br i1 false, label %6, label %8
+
+6:                                                ; preds = %5
+  %7 = extractelement <8 x float> zeroinitializer, i64 0
+  br label %8
+
+8:                                                ; preds = %6, %5, %1
+  %9 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %10 = phi float [ %2, %6 ], [ 0.000000e+00, %1 ], [ %2, %5 ]
+  %11 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %12 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %13 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %14 = phi float [ %3, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %15 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %16 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ %3, %5 ]
+  br label %1
+}
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7159,6 +7159,7 @@
   finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
            function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
     IsFinalized = true;
+    bool ShouldExtendManyInputs = (InVectors.size() > 1);
     if (Action) {
       const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
       if (InVectors.size() == 2) {
@@ -7179,7 +7180,7 @@
             CommonMask.size()));
       Action(V, CommonMask);
     }
-    ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
+    ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/ShouldExtendManyInputs);
     if (CommonMask.empty())
       return Cost;
     int Limit = CommonMask.size() * 2;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155275.540325.patch
Type: text/x-patch
Size: 4356 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230714/5a365699/attachment.bin>
    
    
More information about the llvm-commits
mailing list