[llvm] c90235f - [LV] Drop wrap flags for reductions using VP def-use chain.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu May 19 12:37:02 PDT 2022


Author: Florian Hahn
Date: 2022-05-19T20:36:46+01:00
New Revision: c90235f0ef0bcdfab5087da496ea44de652a7363

URL: https://github.com/llvm/llvm-project/commit/c90235f0ef0bcdfab5087da496ea44de652a7363
DIFF: https://github.com/llvm/llvm-project/commit/c90235f0ef0bcdfab5087da496ea44de652a7363.diff

LOG: [LV] Drop wrap flags for reductions using VP def-use chain.

Update clearReductionWrapFlags to use the VPlan def-use chain from the
reduction phi recipe to drop reduction wrap flags.

This addresses an existing FIXME and fixes a crash when instructions in
the reduction chain are not used and have been removed before VPlan
codegeneration.

Fixes #55540.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0669f9ad4dfc9..5e13c6e1fd1f6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -581,7 +581,7 @@ class InnerLoopVectorizer {
   void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
 
   /// Clear NSW/NUW flags from reduction instructions if necessary.
-  void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
+  void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
                                VPTransformState &State);
 
   /// Fixup the LCSSA phi nodes in the unique exit block.  This simply
@@ -3884,7 +3884,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
   Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
 
   // Wrap flags are in general invalid after vectorization, clear them.
-  clearReductionWrapFlags(RdxDesc, State);
+  clearReductionWrapFlags(PhiR, State);
 
   // Before each round, move the insertion point right between
   // the PHIs and the values we are going to write.
@@ -4060,34 +4060,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
   OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
 }
 
-void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
+void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
                                                   VPTransformState &State) {
+  const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
   RecurKind RK = RdxDesc.getRecurrenceKind();
   if (RK != RecurKind::Add && RK != RecurKind::Mul)
     return;
 
-  Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
-  assert(LoopExitInstr && "null loop exit instruction");
-  SmallVector<Instruction *, 8> Worklist;
-  SmallPtrSet<Instruction *, 8> Visited;
-  Worklist.push_back(LoopExitInstr);
-  Visited.insert(LoopExitInstr);
+  SmallVector<VPValue *, 8> Worklist;
+  SmallPtrSet<VPValue *, 8> Visited;
+  Worklist.push_back(PhiR);
+  Visited.insert(PhiR);
 
   while (!Worklist.empty()) {
-    Instruction *Cur = Worklist.pop_back_val();
-    if (isa<OverflowingBinaryOperator>(Cur))
-      for (unsigned Part = 0; Part < UF; ++Part) {
-        // FIXME: Should not rely on getVPValue at this point.
-        Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
-        cast<Instruction>(V)->dropPoisonGeneratingFlags();
+    VPValue *Cur = Worklist.pop_back_val();
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      Value *V = State.get(Cur, Part);
+      if (!isa<OverflowingBinaryOperator>(V))
+        break;
+      cast<Instruction>(V)->dropPoisonGeneratingFlags();
       }
 
-    for (User *U : Cur->users()) {
-      Instruction *UI = cast<Instruction>(U);
-      if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) &&
-          Visited.insert(UI).second)
-        Worklist.push_back(UI);
-    }
+      for (VPUser *U : Cur->users()) {
+        auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
+        if (!UserRecipe)
+          continue;
+        for (VPValue *V : UserRecipe->definedValues())
+          if (Visited.insert(V).second)
+            Worklist.push_back(V);
+      }
   }
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 862a3845b4a1b..9a8e57d396a5a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -478,3 +478,38 @@ exit:
   store i32 %sum.lcssa, i32* %gep.dst.1, align 4
   ret void
 }
+
+; Test for PR55540.
+define void @test_drop_poison_generating_dead_recipe(i64* %dst) {
+; CHECK-LABEL: @test_drop_poison_generating_dead_recipe(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
+; CHECK-NEXT:    [[TMP0]] = add <4 x i64> [[VEC_PHI]], <i64 -31364, i64 -31364, i64 -31364, i64 -31364>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360
+; CHECK-NEXT:    br i1 [[TMP1]], label %middle.block, label %vector.body
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
+; CHECK-NEXT:    store i64 [[TMP2]], i64* [[DST:%.*]], align 8
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 363, 360
+; CHECK-NEXT:    br i1 [[CMP_N]], label %exit, label %scalar.ph
+; CHECK:       scalar.ph:
+;
+entry:
+  br label %body
+
+body:
+  %red = phi i64 [ 0, %entry ], [ %red.next, %body ]
+  %iv = phi i32 [ 2, %entry ], [ %iv.next, %body ]
+  %add.1 = add nuw i64 %red, -23523
+  store i64 %add.1, i64* %dst, align 8
+  %red.next = add nuw i64 %red, -31364
+  store i64 %red.next, i64* %dst, align 8
+  %iv.next = add nuw nsw i32 %iv, 1
+  %ec = icmp ugt i32 %iv, 363
+  br i1 %ec, label %exit, label %body
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list