[llvm] [LoopVectorize] Use new getUniqueLatchExitBlock routine (PR #108231)

Wed Sep 11 07:09:23 PDT 2024

https://github.com/david-arm created https://github.com/llvm/llvm-project/pull/108231

With PR #88385 I am introducing support for vectorising more loops with early exits that don't require a scalar epilogue. As such, if a loop doesn't have a unique exit block it will not automatically imply we require a scalar epilogue. Also, in all places in the code today where we use the variable LoopExitBlock we actually mean the exit block from the latch. Therefore, it seemed reasonable to add a new
getUniqueLatchExitBlock that allows the caller to determine the exit block taken from the latch and use this instead of getUniqueExitBlock. I also renamed LoopExitBlock to be LatchExitBlock. I feel this not only better reflects how the variable is used today, but also prepares the code for PR #88385.

While doing this I also noticed that one of the comments in requiresScalarEpilogue is wrong when we require a scalar epilogue, i.e. when we're not exiting from the latch block. This doesn't always imply we have multiple exits, e.g. see the test in

Transforms/LoopVectorize/unroll_nonlatch.ll

where the latch unconditionally branches back to the only exiting block.

>From 36c23112b9c0952d412ed343192093bac73690c7 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 11 Sep 2024 14:04:18 +0000
Subject: [PATCH] [LoopVectorize] Use new getUniqueLatchExitBlock routine

With PR #88385 I am introducing support for vectorising more
loops with early exits that don't require a scalar epilogue.
As such, if a loop doesn't have a unique exit block it will
not automatically imply we require a scalar epilogue. Also,
in all places in the code today where we use the variable
LoopExitBlock we actually mean the exit block from the latch.
Therefore, it seemed reasonable to add a new
getUniqueLatchExitBlock that allows the caller to determine
the exit block taken from the latch and use this instead of
getUniqueExitBlock. I also renamed LoopExitBlock to be
LatchExitBlock. I feel this not only better reflects how the
variable is used today, but also prepares the code for
PR #88385.

While doing this I also noticed that one of the comments in
requiresScalarEpilogue is wrong when we require a scalar
epilogue, i.e. when we're not exiting from the latch block.
This doesn't always imply we have multiple exits, e.g. see
the test in

Transforms/LoopVectorize/unroll_nonlatch.ll

where the latch unconditionally branches back to the only
exiting block.
---
 llvm/include/llvm/Support/GenericLoopInfo.h   |  4 +++
 .../llvm/Support/GenericLoopInfoImpl.h        | 10 +++++++
 .../Transforms/Vectorize/LoopVectorize.cpp    | 26 +++++++++----------
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Support/GenericLoopInfo.h b/llvm/include/llvm/Support/GenericLoopInfo.h
index d560ca648132c9..0fa13e2a3d0e15 100644
--- a/llvm/include/llvm/Support/GenericLoopInfo.h
+++ b/llvm/include/llvm/Support/GenericLoopInfo.h
@@ -294,6 +294,10 @@ template <class BlockT, class LoopT> class LoopBase {
   /// Otherwise return null.
   BlockT *getUniqueExitBlock() const;
 
+  /// Return the unique exit block for the latch, or null if there are multiple
+  /// different exit blocks.
+  BlockT *getUniqueLatchExitBlock() const;
+
   /// Return true if this loop does not have any exit blocks.
   bool hasNoExitBlocks() const;
 
diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
index d19022729ace32..4945ea30950d23 100644
--- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h
+++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
@@ -159,6 +159,16 @@ BlockT *LoopBase<BlockT, LoopT>::getUniqueExitBlock() const {
   return getExitBlockHelper(this, true).first;
 }
 
+template <class BlockT, class LoopT>
+BlockT *LoopBase<BlockT, LoopT>::getUniqueLatchExitBlock() const {
+  const BlockT *Latch = getLoopLatch();
+  assert(Latch && "Latch block must exists");
+  SmallVector<BlockT *, 4> ExitBlocks;
+  getUniqueExitBlocksHelper(this, ExitBlocks,
+                            [Latch](const BlockT *BB) { return BB == Latch; });
+  return ExitBlocks.size() == 1 ? ExitBlocks[0] : nullptr;
+}
+
 /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
 template <class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::getExitEdges(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a8722db654f5c9..04a350f1f8d9cb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -652,9 +652,8 @@ class InnerLoopVectorizer {
   /// Middle Block between the vector and the scalar.
   BasicBlock *LoopMiddleBlock;
 
-  /// The unique ExitBlock of the scalar loop if one exists.  Note that
-  /// there can be multiple exiting edges reaching this block.
-  BasicBlock *LoopExitBlock;
+  /// The exit block from the loop latch, if one exists.
+  BasicBlock *LatchExitBlock;
 
   /// The scalar loop body.
   BasicBlock *LoopScalarBody;
@@ -1390,6 +1389,8 @@ class LoopVectorizationCostModel {
 
   /// Returns true if we're required to use a scalar epilogue for at least
   /// the final iteration of the original loop.
+  /// TODO: It would be good to cache the result and avoid recalculating,
+  /// which will also avoid reprinting the same debug message many times.
   bool requiresScalarEpilogue(bool IsVectorizing) const {
     if (!isScalarEpilogueAllowed()) {
       LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n");
@@ -1398,8 +1399,8 @@ class LoopVectorizationCostModel {
     // If we might exit from anywhere but the latch, must run the exiting
     // iteration in scalar form.
     if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
-      LLVM_DEBUG(
-          dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
+      LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: not exiting "
+                           "from latch block\n");
       return true;
     }
     if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
@@ -2043,8 +2044,7 @@ class GeneratedRTChecks {
   /// adjusts the branches to branch to the vector preheader or \p Bypass,
   /// depending on the generated condition.
   BasicBlock *emitSCEVChecks(BasicBlock *Bypass,
-                             BasicBlock *LoopVectorPreHeader,
-                             BasicBlock *LoopExitBlock) {
+                             BasicBlock *LoopVectorPreHeader) {
     if (!SCEVCheckCond)
       return nullptr;
 
@@ -2516,7 +2516,7 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
 
 BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
   BasicBlock *const SCEVCheckBlock =
-      RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock);
+      RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader);
   if (!SCEVCheckBlock)
     return nullptr;
 
@@ -2533,7 +2533,7 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
       // If there is an epilogue which must run, there's no edge from the
       // middle block to exit blocks  and thus no need to update the immediate
       // dominator of the exit blocks.
-      DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock);
+      DT->changeImmediateDominator(LatchExitBlock, SCEVCheckBlock);
   }
 
   LoopBypassBlocks.push_back(SCEVCheckBlock);
@@ -2581,9 +2581,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
   LoopScalarBody = OrigLoop->getHeader();
   LoopVectorPreHeader = OrigLoop->getLoopPreheader();
   assert(LoopVectorPreHeader && "Invalid loop structure");
-  LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
-  assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
-         "multiple exit loop without required epilogue?");
+  LatchExitBlock = OrigLoop->getUniqueLatchExitBlock();
+  assert((LatchExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
+         "Expected loop without exit from latch to require an epilogue");
 
   LoopMiddleBlock =
       SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
@@ -7788,7 +7788,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
     // If there is an epilogue which must run, there's no edge from the
     // middle block to exit blocks  and thus no need to update the immediate
     // dominator of the exit blocks.
-    DT->changeImmediateDominator(LoopExitBlock,
+    DT->changeImmediateDominator(LatchExitBlock,
                                  EPI.EpilogueIterationCountCheck);
 
   // Keep track of bypass blocks, as they feed start values to the induction and