[PATCH] D155275: [SLP] Set ExtendingManyInputs argument of addMask based on size of InVectors (PR63668)

Vedant Paranjape via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 14 01:56:52 PDT 2023


vedant-amd created this revision.
Herald added subscribers: vporpo, hiraditya.
Herald added a project: All.
vedant-amd requested review of this revision.
Herald added subscribers: llvm-commits, wangpc.
Herald added a project: LLVM.

In the finalize function, addMask is always called with
ExtendingManyInputs=true, this need not be always true. In case size of
InVectors is 1, then there is just one input, and this option being
passed, triggers an assert in the addMask function.

void addMask(llvm::SmallVectorImpl<int>&, llvm::ArrayRef<int>, bool):
Assertion `(!ExtendingManyInputs || SubMask.size() > Mask.size()) &&
"SubMask with many inputs support must be larger than the mask."'
failed.

This patch, adds a check on the size of InVectors to set the
ExtendingManyInputs flag and also adds a testcase to exhibit the issue.
This issue was also reported in PR#63668 and fixes it.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155275

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll


Index: llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s
+
+define internal i32 @testfunc() {
+; CHECK-LABEL: define internal i32 @testfunc
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br label [[TMP1:%.*]]
+; CHECK:       1:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ 0.000000e+00, [[TMP8:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ]
+; CHECK-NEXT:    br i1 false, label [[TMP8]], label [[TMP5:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    br i1 false, label [[TMP6:%.*]], label [[TMP8]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> zeroinitializer, i64 0
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi float [ [[TMP2]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP2]], [[TMP5]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi float [ [[TMP3]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP3]], [[TMP5]] ]
+; CHECK-NEXT:    br label [[TMP1]]
+;
+  br label %1
+
+1:                                                ; preds = %8, %0
+  %2 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  %3 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  %4 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  br i1 false, label %8, label %5
+
+5:                                                ; preds = %1
+  br i1 false, label %6, label %8
+
+6:                                                ; preds = %5
+  %7 = extractelement <8 x float> zeroinitializer, i64 0
+  br label %8
+
+8:                                                ; preds = %6, %5, %1
+  %9 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %10 = phi float [ %2, %6 ], [ 0.000000e+00, %1 ], [ %2, %5 ]
+  %11 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %12 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %13 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %14 = phi float [ %3, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %15 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %16 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ %3, %5 ]
+  br label %1
+}
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7159,6 +7159,7 @@
   finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
            function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
     IsFinalized = true;
+    bool ShouldExtendManyInputs = (InVectors.size() > 1);
     if (Action) {
       const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
       if (InVectors.size() == 2) {
@@ -7179,7 +7180,7 @@
             CommonMask.size()));
       Action(V, CommonMask);
     }
-    ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
+    ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/ShouldExtendManyInputs);
     if (CommonMask.empty())
       return Cost;
     int Limit = CommonMask.size() * 2;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155275.540325.patch
Type: text/x-patch
Size: 4356 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230714/5a365699/attachment.bin>


More information about the llvm-commits mailing list