[llvm] eb14d2a - [SLP]Fix check for matched gather node, if it is a subvector node

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 06:48:53 PST 2025


Author: Alexey Bataev
Date: 2025-02-24T06:48:43-08:00
New Revision: eb14d2a1d48fe76c5faed71a08547135a35ee742

URL: https://github.com/llvm/llvm-project/commit/eb14d2a1d48fe76c5faed71a08547135a35ee742
DIFF: https://github.com/llvm/llvm-project/commit/eb14d2a1d48fe76c5faed71a08547135a35ee742.diff

LOG: [SLP]Fix check for matched gather node, if it is a subvector node

If the gather node is a subvector node, it may match the existing
vector/gather node in the graph, but still may require reordering. in
this case need to fully check its dependencies to prevent a compiler
crash.

Fixes #128401

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf256d82ae17d..5fc5fb10fad55 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13320,9 +13320,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
   Entries.clear();
   // TODO: currently checking only for Scalars in the tree entry, need to count
   // reused elements too for better cost estimation.
-  const EdgeInfo &TEUseEI = TE == VectorizableTree.front().get()
-                                ? EdgeInfo(const_cast<TreeEntry *>(TE), 0)
-                                : TE->UserTreeIndex;
+  auto GetUserEntry = [&](const TreeEntry *TE) {
+    while (TE->UserTreeIndex && TE->UserTreeIndex.EdgeIdx == UINT_MAX)
+      TE = TE->UserTreeIndex.UserTE;
+    if (TE == VectorizableTree.front().get())
+      return EdgeInfo(const_cast<TreeEntry *>(TE), 0);
+    return TE->UserTreeIndex;
+  };
+  const EdgeInfo TEUseEI = GetUserEntry(TE);
+  if (!TEUseEI)
+    return std::nullopt;
   const Instruction *TEInsertPt = &getLastInstructionInBundle(TEUseEI.UserTE);
   const BasicBlock *TEInsertBlock = nullptr;
   // Main node of PHI entries keeps the correct order of operands/incoming
@@ -13874,15 +13881,13 @@ BoUpSLP::isGatherShuffledEntry(
   assert(VL.size() % NumParts == 0 &&
          "Number of scalars must be divisible by NumParts.");
   if (TE->UserTreeIndex && TE->UserTreeIndex.UserTE->isGather() &&
-      TE->UserTreeIndex.EdgeIdx == UINT_MAX) {
-    assert(
-        (TE->Idx == 0 ||
-         (TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
-         isSplat(TE->Scalars) ||
-         getSameValuesTreeEntry(TE->getMainOp(), TE->Scalars)) &&
-        "Expected splat or extractelements only node.");
+      TE->UserTreeIndex.EdgeIdx == UINT_MAX &&
+      (TE->Idx == 0 ||
+       (TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
+       isSplat(TE->Scalars) ||
+       (TE->hasState() &&
+        getSameValuesTreeEntry(TE->getMainOp(), TE->Scalars))))
     return {};
-  }
   unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
   SmallVector<std::optional<TTI::ShuffleKind>> Res;
   for (unsigned Part : seq<unsigned>(NumParts)) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll
new file mode 100644
index 0000000000000..c12918f3bb81d
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+ at g = global [8 x i8] zeroinitializer
+
+define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[L2_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 5), align 1
+; CHECK-NEXT:    [[LI_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 1
+; CHECK-NEXT:    [[L1_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 7), align 1
+; CHECK-NEXT:    [[L6_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), align 1
+; CHECK-NEXT:    br i1 false, label %[[IF_END151_1_I_I_1_I_I_I:.*]], label %[[END:.*]]
+; CHECK:       [[PRE:.*]]:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[LI_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L2_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L6_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L1_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L1_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L6_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L2_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[LI_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[IF_END151_1_I_I_1_I_I_I]]
+; CHECK:       [[IF_END151_1_I_I_1_I_I_I]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP7]], %[[END]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP3]], %[[END]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP2]], %[[END]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP1]], %[[END]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP0]], %[[END]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
+; CHECK-NEXT:    [[TMP19:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
+; CHECK-NEXT:    [[TMP22:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP7]], %[[END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT_I_1121_I_1_I_I_I:%.*]] = icmp ne i8 [[TMP18]], 0
+; CHECK-NEXT:    [[C1_1_1117_I_1_I9_I_I:%.*]] = icmp eq i8 [[TMP17]], 0
+; CHECK-NEXT:    [[C1_I_1_I_I_I:%.*]] = icmp eq i8 [[TMP16]], 0
+; CHECK-NEXT:    [[C1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+; CHECK-NEXT:    [[CMP258_I_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP13]], 0
+; CHECK-NEXT:    [[C1_I_I_I_I:%.*]] = icmp eq i8 [[TMP14]], 0
+; CHECK-NEXT:    [[CMP258_I_I_I_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+; CHECK-NEXT:    [[C1_1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 0
+; CHECK-NEXT:    [[CMP258_I_1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
+; CHECK-NEXT:    [[C1_187_I_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+; CHECK-NEXT:    [[CMP258_I_185_I_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
+; CHECK-NEXT:    [[C1_1_1117_I_I_I_I:%.*]] = icmp eq i8 [[TMP19]], 0
+; CHECK-NEXT:    [[CMP258_I_1_1115_I_I_I_I:%.*]] = icmp eq i8 [[TMP20]], 0
+; CHECK-NEXT:    [[C1_1113_I_I_I_I:%.*]] = icmp eq i8 [[TMP21]], 0
+; CHECK-NEXT:    [[CMP258_I_1111_I_I_I_I:%.*]] = icmp eq i8 [[TMP22]], 0
+; CHECK-NEXT:    [[C1_187_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP16]], 0
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %l2.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 5), align 1
+  %li.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 1
+  %l1.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 7), align 1
+  %l6.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), align 1
+  br i1 false, label %if.end151.1.i.i.1.i.i.i, label %end
+
+pre:
+  br label %end
+
+end:
+  %0 = phi i8 [ 0, %pre ], [ %li.i.i.i, %entry ]
+  %1 = phi i8 [ 0, %pre ], [ %l2.i.i.i.i, %entry ]
+  %2 = phi i8 [ 0, %pre ], [ %l6.i.i.i.i, %entry ]
+  %3 = phi i8 [ 0, %pre ], [ %l1.i.i.i.i, %entry ]
+  %4 = phi i8 [ 0, %pre ], [ %l1.i.i.i.i, %entry ]
+  %5 = phi i8 [ 0, %pre ], [ %l6.i.i.i.i, %entry ]
+  %6 = phi i8 [ 0, %pre ], [ %l2.i.i.i.i, %entry ]
+  %7 = phi i8 [ 0, %pre ], [ %li.i.i.i, %entry ]
+  br label %if.end151.1.i.i.1.i.i.i
+
+if.end151.1.i.i.1.i.i.i:
+  %8 = phi i8 [ %li.i.i.i, %entry ], [ %7, %end ]
+  %9 = phi i8 [ %l2.i.i.i.i, %entry ], [ %6, %end ]
+  %10 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
+  %11 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
+  %12 = phi i8 [ %l1.i.i.i.i, %entry ], [ %3, %end ]
+  %13 = phi i8 [ 0, %entry ], [ %2, %end ]
+  %14 = phi i8 [ %l2.i.i.i.i, %entry ], [ %1, %end ]
+  %15 = phi i8 [ %li.i.i.i, %entry ], [ %0, %end ]
+  %16 = phi i8 [ 0, %entry ], [ %6, %end ]
+  %17 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
+  %18 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
+  %19 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
+  %20 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
+  %21 = phi i8 [ %l2.i.i.i.i, %entry ], [ %6, %end ]
+  %22 = phi i8 [ %li.i.i.i, %entry ], [ %7, %end ]
+  %tobool.not.i.1121.i.1.i.i.i = icmp ne i8 %18, 0
+  %c1.1.1117.i.1.i9.i.i = icmp eq i8 %17, 0
+  %c1.i.1.i.i.i = icmp eq i8 %16, 0
+  %c1.1.i.i.i.i = icmp eq i8 %12, 0
+  %cmp258.i.1.i.i.i.i = icmp eq i8 %13, 0
+  %c1.i.i.i.i = icmp eq i8 %14, 0
+  %cmp258.i.i.i.i.i = icmp eq i8 %15, 0
+  %c1.1.1.i.i.i.i = icmp eq i8 %11, 0
+  %cmp258.i.1.1.i.i.i.i = icmp eq i8 %10, 0
+  %c1.187.i.i.i.i = icmp eq i8 %9, 0
+  %cmp258.i.185.i.i.i.i = icmp eq i8 %8, 0
+  %c1.1.1117.i.i.i.i = icmp eq i8 %19, 0
+  %cmp258.i.1.1115.i.i.i.i = icmp eq i8 %20, 0
+  %c1.1113.i.i.i.i = icmp eq i8 %21, 0
+  %cmp258.i.1111.i.i.i.i = icmp eq i8 %22, 0
+  %c1.187.1.i.i.i.i = icmp eq i8 %16, 0
+  ret i32 0
+}


        


More information about the llvm-commits mailing list