[llvm] ccebf7a - [VPlan] Properly handle sinking of replicate regions.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue May 4 14:36:36 PDT 2021


Author: Florian Hahn
Date: 2021-05-04T22:36:01+01:00
New Revision: ccebf7a1096a887e29d1f765cf6b27878466d0f4

URL: https://github.com/llvm/llvm-project/commit/ccebf7a1096a887e29d1f765cf6b27878466d0f4
DIFF: https://github.com/llvm/llvm-project/commit/ccebf7a1096a887e29d1f765cf6b27878466d0f4.diff

LOG: [VPlan] Properly handle sinking of replicate regions.

This patch updates the code that sinks recipes required for first-order
recurrences to properly handle replicate-regions. At the moment, the
code would just move the replicate recipe out of its replicate-region,
producing an invalid VPlan.

When sinking a recipe in a replicate-region, we have to sink the whole
region. To do that, we first need to split the block at the target
recipe and move the region in between.

This patch also adds a splitAt helper to VPBasicBlock to split a
VPBasicBlock at a given iterator.

Fixes PR50009.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D100751

Added: 
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlan.cpp
    llvm/lib/Transforms/Vectorize/VPlan.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fb0daff53834b..b2740826718f4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9094,6 +9094,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   for (auto &Entry : SinkAfter) {
     VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
     VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
+
     // If the target is in a replication region, make sure to move Sink to the
     // block after it, not into the replication region itself.
     if (auto *Region =
@@ -9106,7 +9107,37 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
         continue;
       }
     }
-    Sink->moveAfter(Target);
+
+    auto *SinkRegion =
+        dyn_cast_or_null<VPRegionBlock>(Sink->getParent()->getParent());
+    // Unless the sink source is in a replicate region, sink the recipe
+    // directly.
+    if (!SinkRegion || !SinkRegion->isReplicator()) {
+      Sink->moveAfter(Target);
+      continue;
+    }
+
+    // If the sink source is in a replicate region, we need to move the whole
+    // replicate region, which should only contain a single recipe in the main
+    // block.
+    assert(Sink->getParent()->size() == 1 &&
+           "parent must be a replicator with a single recipe");
+    auto *SplitBlock =
+        Target->getParent()->splitAt(std::next(Target->getIterator()));
+
+    auto *Pred = SinkRegion->getSinglePredecessor();
+    auto *Succ = SinkRegion->getSingleSuccessor();
+    VPBlockUtils::disconnectBlocks(Pred, SinkRegion);
+    VPBlockUtils::disconnectBlocks(SinkRegion, Succ);
+    VPBlockUtils::connectBlocks(Pred, Succ);
+
+    auto *SplitPred = SplitBlock->getSinglePredecessor();
+
+    VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock);
+    VPBlockUtils::connectBlocks(SplitPred, SinkRegion);
+    VPBlockUtils::connectBlocks(SinkRegion, SplitBlock);
+    if (VPBB == SplitPred)
+      VPBB = SplitBlock;
   }
 
   // Interleave memory: for each Interleave Group we marked earlier as relevant

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 41a618bd5a9b3..f77a950aa476a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -400,6 +400,32 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
   }
 }
 
+VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
+  assert(SplitAt->getParent() == this &&
+         "can only split at a position in the same block");
+
+  SmallVector<VPBlockBase *, 2> Succs(getSuccessors().begin(),
+                                      getSuccessors().end());
+  // First, disconnect the current block from its successors.
+  for (VPBlockBase *Succ : Succs)
+    VPBlockUtils::disconnectBlocks(this, Succ);
+
+  // Create new empty block after the block to split.
+  auto *SplitBlock = new VPBasicBlock(getName() + ".split");
+  VPBlockUtils::insertBlockAfter(SplitBlock, this);
+
+  // Add successors for block to split to new block.
+  for (VPBlockBase *Succ : Succs)
+    VPBlockUtils::connectBlocks(SplitBlock, Succ);
+
+  // Finally, move the recipes starting at SplitAt to new block.
+  for (VPRecipeBase &ToMove :
+       make_early_inc_range(make_range(SplitAt, this->end())))
+    ToMove.moveBefore(*SplitBlock, SplitBlock->end());
+
+  return SplitBlock;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
                          VPSlotTracker &SlotTracker) const {

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 26ec86ef72aa5..cf59a34235ab3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1531,6 +1531,11 @@ class VPBasicBlock : public VPBlockBase {
 
   void dropAllReferences(VPValue *NewValue) override;
 
+  /// Split current block at \p SplitAt by inserting a new block between the
+  /// current block and its successors and moving all recipes starting at
+  /// SplitAt to the new block. Returns the new block.
+  VPBasicBlock *splitAt(iterator SplitAt);
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
   /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
new file mode 100644
index 0000000000000..3e5c7def5005a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -0,0 +1,204 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Test cases for PR50009, which require sinking a replicate-region due to a
+; first-order recurrence.
+
+define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
+; CHECK-LABEL: sink_replicate_region_1
+; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: loop:
+; CHECK-NEXT:   WIDEN-PHI %0 = phi 0, %conv
+; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
+; CHECK-NEXT:   EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
+; CHECK-NEXT: Successor(s): loop.0
+
+; CHECK:      loop.0:
+; CHECK-NEXT:   REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
+; CHECK-NEXT: Successor(s): pred.load
+
+; CHECK:     <xVFxUF> pred.load: {
+; CHECK-NEXT:   pred.load.entry:
+; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
+; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
+; CHECK-NEXT:   CondBit: vp<%3> (loop)
+
+; CHECK:      pred.load.if:
+; CHECK-NEXT:     REPLICATE ir<%lv> = load ir<%gep> (S->V)
+; CHECK-NEXT:   Successor(s): pred.load.continue
+
+; CHECK:      pred.load.continue:
+; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%lv>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+
+; CHECK:      loop.1:
+; CHECK-NEXT:   WIDEN ir<%conv> = sext vp<%6>
+; CHECK-NEXT: Successor(s): pred.srem
+
+; CHECK:      <xVFxUF> pred.srem: {
+; CHECK-NEXT:   pred.srem.entry:
+; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
+; CHECK-NEXT:   Successor(s): pred.srem.if, pred.srem.continue
+; CHECK-NEXT:   CondBit: vp<%3> (loop)
+
+; CHECK:      pred.srem.if:
+; CHECK-NEXT:     REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
+; CHECK-NEXT:   Successor(s): pred.srem.continue
+
+; CHECK:      pred.srem.continue:
+; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+
+; CHECK:      loop.1.split:
+; CHECK-NEXT:   WIDEN ir<%add> = add ir<%conv>, vp<%9>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+  br label %loop
+
+loop:
+  %0 = phi i32 [ 0, %entry ], [ %conv, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %rem = srem i32 %0, %x
+  %gep = getelementptr i8, i8* %ptr, i32 %iv
+  %lv = load i8, i8* %gep
+  %conv = sext i8 %lv to i32
+  %add = add i32 %conv, %rem
+  %iv.next = add nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 20001
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
+; CHECK-LABEL: sink_replicate_region_2
+; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: loop:
+; CHECK-NEXT:   WIDEN-PHI %recur = phi 0, %recur.next
+; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
+; CHECK-NEXT:   EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
+; CHECK-NEXT: Successor(s): loop.0
+
+; CHECK:      loop.0:
+; CHECK-NEXT:   WIDEN ir<%recur.next> = sext ir<%y>
+; CHECK-NEXT: Successor(s): pred.srem
+
+; CHECK:      <xVFxUF> pred.srem: {
+; CHECK-NEXT:   pred.srem.entry:
+; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
+; CHECK-NEXT:   Successor(s): pred.srem.if, pred.srem.continue
+; CHECK-NEXT:   CondBit: vp<%3> (loop)
+
+; CHECK:       pred.srem.if:
+; CHECK-NEXT:     REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
+; CHECK-NEXT:   Successor(s): pred.srem.continue
+
+; CHECK:      pred.srem.continue:
+; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+
+; CHECK:     loop.0.split:
+; CHECK-NEXT:   REPLICATE ir<%add> = add vp<%6>, ir<%recur.next>
+; CHECK-NEXT:   REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
+; CHECK-NEXT: Successor(s): pred.store
+
+; CHECK:      <xVFxUF> pred.store: {
+; CHECK-NEXT:   pred.store.entry:
+; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
+; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
+; CHECK-NEXT:   CondBit: vp<%3> (loop)
+
+; CHECK:      pred.store.if:
+; CHECK-NEXT:     REPLICATE store ir<%add>, ir<%gep>
+; CHECK-NEXT:   Successor(s): pred.store.continue
+
+; CHECK:     pred.store.continue:
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+
+; CHECK:      loop.1:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+  br label %loop
+
+loop:
+  %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %rem = srem i32 %recur, %x
+  %recur.next = sext i8 %y to i32
+  %add = add i32 %rem, %recur.next
+  %gep = getelementptr i32, i32* %ptr, i32 %iv
+  store i32 %add, i32* %gep
+  %iv.next = add nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 20001
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize {
+; CHECK-LABEL: sink_replicate_region_3_reduction
+; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: loop:
+; CHECK-NEXT:   WIDEN-PHI %recur = phi 0, %recur.next
+; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
+; CHECK-NEXT:   WIDEN-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
+; CHECK-NEXT:   EMIT vp<%4> = icmp ule ir<%iv> vp<%0>
+; CHECK-NEXT: Successor(s): loop.0
+
+; CHECK:      loop.0:
+; CHECK-NEXT:   WIDEN ir<%recur.next> = sext ir<%y>
+; CHECK-NEXT: Successor(s): pred.srem
+
+; CHECK:      <xVFxUF> pred.srem: {
+; CHECK-NEXT:   pred.srem.entry:
+; CHECK-NEXT:     BRANCH-ON-MASK vp<%4>
+; CHECK-NEXT:   Successor(s): pred.srem.if, pred.srem.continue
+; CHECK-NEXT:   CondBit: vp<%4> (loop)
+
+; CHECK:       pred.srem.if:
+; CHECK-NEXT:     REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V)
+; CHECK-NEXT:   Successor(s): pred.srem.continue
+
+; CHECK:      pred.srem.continue:
+; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+
+; CHECK:     loop.0.split:
+; CHECK-NEXT:   WIDEN ir<%add> = add vp<%7>, ir<%recur.next>
+; CHECK-NEXT:   WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
+; CHECK-NEXT:   EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+  br label %loop
+
+loop:
+  %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ]
+  %rem = srem i32 %recur, %x
+  %recur.next = sext i8 %y to i32
+  %add = add i32 %rem, %recur.next
+  %and.red.next = and i32 %and.red, %add
+  %iv.next = add nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 20001
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = phi i32 [ %and.red.next, %loop ]
+  ret i32 %res
+}


        


More information about the llvm-commits mailing list