[llvm] 2f8f17c - [SLP]Fix PR58956: fix insertpoint for reduced buildvector graphs.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 16 07:39:53 PST 2022
Author: Alexey Bataev
Date: 2022-11-16T07:38:49-08:00
New Revision: 2f8f17c157825bd2310b4e5725f0f9d75996ef52
URL: https://github.com/llvm/llvm-project/commit/2f8f17c157825bd2310b4e5725f0f9d75996ef52
DIFF: https://github.com/llvm/llvm-project/commit/2f8f17c157825bd2310b4e5725f0f9d75996ef52.diff
LOG: [SLP]Fix PR58956: fix insertpoint for reduced buildvector graphs.
If the graph is only the buildvector node without main operation, need
to inherit insrtpoint from the redution instruction. Otherwise the
compiler crashes trying to insert instruction at the entry block.
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/buildvector-reduce.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4b72f7914836d..06be11faeaced 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -970,7 +970,8 @@ class BoUpSLP {
/// Vectorize the tree but with the list of externally used values \p
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
/// generated extractvalue instructions.
- Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
+ Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
+ Instruction *ReductionRoot = nullptr);
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
@@ -9002,8 +9003,8 @@ struct ShuffledInsertData {
};
} // namespace
-Value *
-BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
+Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
+ Instruction *ReductionRoot) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
@@ -9020,7 +9021,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
EntryToLastInstruction.try_emplace(E.get(), LastInst);
}
- Builder.SetInsertPoint(&F->getEntryBlock().front());
+ Builder.SetInsertPoint(ReductionRoot ? ReductionRoot
+ : &F->getEntryBlock().front());
auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
// If the vectorized tree can be rewritten in a smaller type, we truncate the
@@ -11944,16 +11946,18 @@ class HorizontalReduction {
Builder.setFastMathFlags(RdxFMF);
- // Vectorize a tree.
- Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues);
-
// Emit a reduction. If the root is a select (min/max idiom), the insert
// point is the compare condition of that select.
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
+ Instruction *InsertPt = RdxRootInst;
if (IsCmpSelMinMax)
- Builder.SetInsertPoint(GetCmpForMinMaxReduction(RdxRootInst));
- else
- Builder.SetInsertPoint(RdxRootInst);
+ InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
+
+ // Vectorize a tree.
+ Value *VectorizedRoot =
+ V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
+
+ Builder.SetInsertPoint(InsertPt);
// To prevent poison from leaking across what used to be sequential,
// safe, scalar boolean logic operations, the reduction operand must be
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/buildvector-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/buildvector-reduce.ll
new file mode 100644
index 0000000000000..32d84612d6618
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/buildvector-reduce.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=arm64-apple-macosx | FileCheck %s
+
+define i8 @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CALL278:%.*]] = call i32 @fn(i32 [[SUM]])
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[CALL278]], i32 0
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE]])
+; CHECK-NEXT: br label [[FOR_BODY]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %sum = phi i32 [ %add285.19, %for.body ], [ 0, %entry ]
+ %call278 = call i32 @fn(i32 %sum)
+ %add285.13 = add i32 %call278, %call278
+ %add285.14 = add i32 %add285.13, %call278
+ %add285.15 = add i32 %add285.14, %call278
+ %add285.16 = add i32 %add285.15, %call278
+ %add285.17 = add i32 %add285.16, %call278
+ %add285.18 = add i32 %add285.17, %call278
+ %add285.19 = add i32 %add285.18, %call278
+ br label %for.body
+}
+
+declare i32 @fn(i32)
More information about the llvm-commits
mailing list