[PATCH] D157631: [LoopVectorize] Fix incorrect order of invariant stores when there are multiple reductions.
Igor Kirillov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 29 07:23:55 PDT 2023
igor.kirillov updated this revision to Diff 554325.
igor.kirillov added a comment.
Added tests with different store pointers
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D157631/new/
https://reviews.llvm.org/D157631
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
Index: llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -558,14 +558,13 @@
}
; Make sure that if there are several reductions in the loop, the order of invariant stores sank outside of the loop is preserved
-; FIXME: This tests currently shows incorrect behavior and it will fixed in the following patch
; See https://github.com/llvm/llvm-project/issues/64047
define void @reduc_add_mul_store_same_ptr(ptr %dst, ptr readonly %src) {
; CHECK-LABEL: define void @reduc_add_mul_store_same_ptr
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1:%.*]])
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
;
entry:
@@ -619,14 +618,13 @@
}
; Same as above but storing is done to two different pointers and they can be aliased
-; FIXME: This tests currently shows incorrect behavior and it will fixed in the following patch
define void @reduc_add_mul_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
; CHECK-LABEL: define void @reduc_add_mul_store_different_ptr
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT: store i32 [[TMP2]], ptr %dst2, align 4
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT: store i32 [[TMP4]], ptr %dst1, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1:%.*]])
+; CHECK-NEXT: store i32 [[TMP2]], ptr %dst1, align 4
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT: store i32 [[TMP4]], ptr %dst2, align 4
;
entry:
br label %for.body
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3617,10 +3617,44 @@
// the incoming edges.
VPBasicBlock *Header =
State.Plan->getVectorLoopRegion()->getEntryBasicBlock();
+
+ // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
+ // sank outside of the loop would keep the same order as they had in the
+ // original loop.
+ SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
for (VPRecipeBase &R : Header->phis()) {
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
- fixReduction(ReductionPhi, State);
- else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
+ ReductionPHIList.emplace_back(ReductionPhi);
+ }
+ stable_sort(ReductionPHIList, [this](const VPReductionPHIRecipe *R1,
+ const VPReductionPHIRecipe *R2) {
+ auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
+ auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
+
+ // If neither of the recipes has an intermediate store, keep the order the
+ // same.
+ if (!IS1 && !IS2)
+ return false;
+
+ // If only one of the recipes has an intermediate store, then move it
+ // towards the beginning of the list.
+ if (IS1 && !IS2)
+ return true;
+
+ if (!IS1 && IS2)
+ return false;
+
+ // If both recipes have an intermediate store, then the recipe with the
+ // later store should be processed earlier. So it should go to the beginning
+ // of the list.
+ return DT->dominates(IS2, IS1);
+ });
+
+ for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList)
+ fixReduction(ReductionPhi, State);
+
+ for (VPRecipeBase &R : Header->phis()) {
+ if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
fixFixedOrderRecurrence(FOR, State);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D157631.554325.patch
Type: text/x-patch
Size: 4318 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230829/4a50f328/attachment.bin>
More information about the llvm-commits
mailing list