[llvm] c90235f - [LV] Drop wrap flags for reductions using VP def-use chain.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu May 19 12:37:02 PDT 2022
Author: Florian Hahn
Date: 2022-05-19T20:36:46+01:00
New Revision: c90235f0ef0bcdfab5087da496ea44de652a7363
URL: https://github.com/llvm/llvm-project/commit/c90235f0ef0bcdfab5087da496ea44de652a7363
DIFF: https://github.com/llvm/llvm-project/commit/c90235f0ef0bcdfab5087da496ea44de652a7363.diff
LOG: [LV] Drop wrap flags for reductions using VP def-use chain.
Update clearReductionWrapFlags to use the VPlan def-use chain from the
reduction phi recipe to drop reduction wrap flags.
This addresses an existing FIXME and fixes a crash when instructions in
the reduction chain are not used and have been removed before VPlan
codegeneration.
Fixes #55540.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0669f9ad4dfc9..5e13c6e1fd1f6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -581,7 +581,7 @@ class InnerLoopVectorizer {
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
/// Clear NSW/NUW flags from reduction instructions if necessary.
- void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
+ void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
VPTransformState &State);
/// Fixup the LCSSA phi nodes in the unique exit block. This simply
@@ -3884,7 +3884,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Wrap flags are in general invalid after vectorization, clear them.
- clearReductionWrapFlags(RdxDesc, State);
+ clearReductionWrapFlags(PhiR, State);
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
@@ -4060,34 +4060,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}
-void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
+void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
VPTransformState &State) {
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RK != RecurKind::Add && RK != RecurKind::Mul)
return;
- Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
- assert(LoopExitInstr && "null loop exit instruction");
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- Worklist.push_back(LoopExitInstr);
- Visited.insert(LoopExitInstr);
+ SmallVector<VPValue *, 8> Worklist;
+ SmallPtrSet<VPValue *, 8> Visited;
+ Worklist.push_back(PhiR);
+ Visited.insert(PhiR);
while (!Worklist.empty()) {
- Instruction *Cur = Worklist.pop_back_val();
- if (isa<OverflowingBinaryOperator>(Cur))
- for (unsigned Part = 0; Part < UF; ++Part) {
- // FIXME: Should not rely on getVPValue at this point.
- Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
- cast<Instruction>(V)->dropPoisonGeneratingFlags();
+ VPValue *Cur = Worklist.pop_back_val();
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *V = State.get(Cur, Part);
+ if (!isa<OverflowingBinaryOperator>(V))
+ break;
+ cast<Instruction>(V)->dropPoisonGeneratingFlags();
}
- for (User *U : Cur->users()) {
- Instruction *UI = cast<Instruction>(U);
- if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) &&
- Visited.insert(UI).second)
- Worklist.push_back(UI);
- }
+ for (VPUser *U : Cur->users()) {
+ auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
+ if (!UserRecipe)
+ continue;
+ for (VPValue *V : UserRecipe->definedValues())
+ if (Visited.insert(V).second)
+ Worklist.push_back(V);
+ }
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 862a3845b4a1b..9a8e57d396a5a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -478,3 +478,38 @@ exit:
store i32 %sum.lcssa, i32* %gep.dst.1, align 4
ret void
}
+
+; Test for PR55540.
+define void @test_drop_poison_generating_dead_recipe(i64* %dst) {
+; CHECK-LABEL: @test_drop_poison_generating_dead_recipe(
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], <i64 -31364, i64 -31364, i64 -31364, i64 -31364>
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360
+; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
+; CHECK-NEXT: store i64 [[TMP2]], i64* [[DST:%.*]], align 8
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 363, 360
+; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph
+; CHECK: scalar.ph:
+;
+entry:
+ br label %body
+
+body:
+ %red = phi i64 [ 0, %entry ], [ %red.next, %body ]
+ %iv = phi i32 [ 2, %entry ], [ %iv.next, %body ]
+ %add.1 = add nuw i64 %red, -23523
+ store i64 %add.1, i64* %dst, align 8
+ %red.next = add nuw i64 %red, -31364
+ store i64 %red.next, i64* %dst, align 8
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp ugt i32 %iv, 363
+ br i1 %ec, label %exit, label %body
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list