[llvm] fe49499 - [SLP]Fix PR55796: insert point for extractelements from different basic blocks.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 1 09:46:59 PDT 2022
Author: Alexey Bataev
Date: 2022-06-01T09:44:53-07:00
New Revision: fe4949942d1cdb7dbfbc96bf0f0ef123308742ed
URL: https://github.com/llvm/llvm-project/commit/fe4949942d1cdb7dbfbc96bf0f0ef123308742ed
DIFF: https://github.com/llvm/llvm-project/commit/fe4949942d1cdb7dbfbc96bf0f0ef123308742ed.diff
LOG: [SLP]Fix PR55796: insert point for extractelements from different basic blocks.
Extractelement instructions may come from different basic blocks, need
to take it into account when looking for a last instruction in the
bundle to prevent compiler crash.
Differential Revision: https://reviews.llvm.org/D126777
Added:
llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index db8f97271db63..03e1959fc1e8d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7232,23 +7232,47 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
// Get the basic block this bundle is in. All instructions in the bundle
- // should be in this block.
+ // should be in this block (except for extractelement-like instructions with
+ // constant indeces).
auto *Front = E->getMainOp();
auto *BB = Front->getParent();
assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
auto *I = cast<Instruction>(V);
- return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
+ return !E->isOpcodeOrAlt(I) || I->getParent() == BB ||
+ isVectorLikeInstWithConstOps(I);
}));
- auto &&FindLastInst = [E, Front]() {
+ auto &&FindLastInst = [E, Front, this, &BB]() {
Instruction *LastInst = Front;
for (Value *V : E->Scalars) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
continue;
- if (LastInst->comesBefore(I))
+ if (LastInst->getParent() == I->getParent()) {
+ if (LastInst->comesBefore(I))
+ LastInst = I;
+ continue;
+ }
+ assert(isVectorLikeInstWithConstOps(LastInst) &&
+ isVectorLikeInstWithConstOps(I) &&
+ "Expected vector-like insts only.");
+ if (!DT->isReachableFromEntry(LastInst->getParent())) {
+ LastInst = I;
+ continue;
+ }
+ if (!DT->isReachableFromEntry(I->getParent()))
+ continue;
+ auto *NodeA = DT->getNode(LastInst->getParent());
+ auto *NodeB = DT->getNode(I->getParent());
+ assert(NodeA && "Should only process reachable instructions");
+ assert(NodeB && "Should only process reachable instructions");
+ assert((NodeA == NodeB) ==
+ (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
+ "Different nodes should have
diff erent DFS numbers");
+ if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn())
LastInst = I;
}
+ BB = LastInst->getParent();
return LastInst;
};
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
new file mode 100644
index 0000000000000..e03e335d55f46
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux -mattr="-avx512pf,+avx512f,+avx512bw" -slp-threshold=-100 -slp-min-tree-size=0 < %s | FileCheck %s
+
+define i32 @foo(i32 %a) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[A:%.*]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
+; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1
+; CHECK-NEXT: [[SHUFFLE13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUFFLE13]])
+; CHECK-NEXT: [[OP_RDX14:%.*]] = add i32 [[TMP6]], 0
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX14]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
+; CHECK-NEXT: ret i32 0
+; CHECK: bb4:
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[SHUFFLE9:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE9]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP9]], 0
+; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], [[TMP2]]
+; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[TMP10]], [[OP_RDX11]]
+; CHECK-NEXT: ret i32 [[OP_RDX12]]
+; CHECK: bb5:
+; CHECK-NEXT: br label [[BB4:%.*]]
+;
+entry:
+ %0 = sub nsw i32 0, %a
+ %local = sub nsw i32 0, 0
+ br i1 false, label %bb5, label %bb1
+
+bb1:
+ %1 = add i32 %0, %local
+ %2 = add i32 %1, 0
+ %3 = add i32 %2, %local
+ %4 = add i32 %3, 0
+ %5 = add i32 %4, %local
+ br label %bb3
+
+bb2:
+ br label %bb3
+
+bb3:
+ %p1 = phi i32 [ %5, %bb1 ], [ 0, %bb2 ]
+ ret i32 0
+
+bb4:
+ %6 = add i32 %0, %local
+ %7 = add i32 %6, %local
+ %8 = add i32 %7, 0
+ %9 = add i32 %8, %local
+ %10 = add i32 %9, 0
+ %11 = add i32 %10, %local
+ %12 = add i32 %11, 0
+ %13 = add i32 %12, %local
+ %14 = add i32 %13, 0
+ %15 = add i32 %14, %local
+ %16 = add i32 %15, 0
+ %17 = add i32 %16, %local
+ %18 = add i32 %17, 0
+ %19 = add i32 %18, %local
+ ret i32 %19
+
+bb5:
+ br label %bb4
+}
More information about the llvm-commits
mailing list