[llvm] fe49499 - [SLP]Fix PR55796: insert point for extractelements from different basic blocks.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 1 09:46:59 PDT 2022


Author: Alexey Bataev
Date: 2022-06-01T09:44:53-07:00
New Revision: fe4949942d1cdb7dbfbc96bf0f0ef123308742ed

URL: https://github.com/llvm/llvm-project/commit/fe4949942d1cdb7dbfbc96bf0f0ef123308742ed
DIFF: https://github.com/llvm/llvm-project/commit/fe4949942d1cdb7dbfbc96bf0f0ef123308742ed.diff

LOG: [SLP]Fix PR55796: insert point for extractelements from different basic blocks.

Extractelement instructions may come from different basic blocks, need
to take it into account when looking for a last instruction in the
bundle to prevent compiler crash.

Differential Revision: https://reviews.llvm.org/D126777

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index db8f97271db63..03e1959fc1e8d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7232,23 +7232,47 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
 
 void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
   // Get the basic block this bundle is in. All instructions in the bundle
-  // should be in this block.
+  // should be in this block (except for extractelement-like instructions with
+  // constant indeces).
   auto *Front = E->getMainOp();
   auto *BB = Front->getParent();
   assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
     auto *I = cast<Instruction>(V);
-    return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
+    return !E->isOpcodeOrAlt(I) || I->getParent() == BB ||
+           isVectorLikeInstWithConstOps(I);
   }));
 
-  auto &&FindLastInst = [E, Front]() {
+  auto &&FindLastInst = [E, Front, this, &BB]() {
     Instruction *LastInst = Front;
     for (Value *V : E->Scalars) {
       auto *I = dyn_cast<Instruction>(V);
       if (!I)
         continue;
-      if (LastInst->comesBefore(I))
+      if (LastInst->getParent() == I->getParent()) {
+        if (LastInst->comesBefore(I))
+          LastInst = I;
+        continue;
+      }
+      assert(isVectorLikeInstWithConstOps(LastInst) &&
+             isVectorLikeInstWithConstOps(I) &&
+             "Expected vector-like insts only.");
+      if (!DT->isReachableFromEntry(LastInst->getParent())) {
+        LastInst = I;
+        continue;
+      }
+      if (!DT->isReachableFromEntry(I->getParent()))
+        continue;
+      auto *NodeA = DT->getNode(LastInst->getParent());
+      auto *NodeB = DT->getNode(I->getParent());
+      assert(NodeA && "Should only process reachable instructions");
+      assert(NodeB && "Should only process reachable instructions");
+      assert((NodeA == NodeB) ==
+                 (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
+             "Different nodes should have 
diff erent DFS numbers");
+      if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn())
         LastInst = I;
     }
+    BB = LastInst->getParent();
     return LastInst;
   };
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
new file mode 100644
index 0000000000000..e03e335d55f46
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux -mattr="-avx512pf,+avx512f,+avx512bw" -slp-threshold=-100 -slp-min-tree-size=0 < %s | FileCheck %s
+
+define i32 @foo(i32 %a) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[A:%.*]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
+; CHECK-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    [[SHUFFLE13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUFFLE13]])
+; CHECK-NEXT:    [[OP_RDX14:%.*]] = add i32 [[TMP6]], 0
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ [[OP_RDX14]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
+; CHECK-NEXT:    ret i32 0
+; CHECK:       bb4:
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[SHUFFLE9:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE9]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT:    [[OP_RDX10:%.*]] = add i32 [[TMP9]], 0
+; CHECK-NEXT:    [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX12:%.*]] = add i32 [[TMP10]], [[OP_RDX11]]
+; CHECK-NEXT:    ret i32 [[OP_RDX12]]
+; CHECK:       bb5:
+; CHECK-NEXT:    br label [[BB4:%.*]]
+;
+entry:
+  %0 = sub nsw i32 0, %a
+  %local = sub nsw i32 0, 0
+  br i1 false, label %bb5, label %bb1
+
+bb1:
+  %1 = add i32 %0, %local
+  %2 = add i32 %1, 0
+  %3 = add i32 %2, %local
+  %4 = add i32 %3, 0
+  %5 = add i32 %4, %local
+  br label %bb3
+
+bb2:
+  br label %bb3
+
+bb3:
+  %p1 = phi i32 [ %5, %bb1 ], [ 0, %bb2 ]
+  ret i32 0
+
+bb4:
+  %6 = add i32 %0, %local
+  %7 = add i32 %6, %local
+  %8 = add i32 %7, 0
+  %9 = add i32 %8, %local
+  %10 = add i32 %9, 0
+  %11 = add i32 %10, %local
+  %12 = add i32 %11, 0
+  %13 = add i32 %12, %local
+  %14 = add i32 %13, 0
+  %15 = add i32 %14, %local
+  %16 = add i32 %15, 0
+  %17 = add i32 %16, %local
+  %18 = add i32 %17, 0
+  %19 = add i32 %18, %local
+  ret i32 %19
+
+bb5:
+  br label %bb4
+}


        


More information about the llvm-commits mailing list