[PATCH] D102438: [LV] Remove unnecessary state variable FoldTailByMasking

Sander de Smalen via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu May 13 13:50:42 PDT 2021


sdesmalen created this revision.
Herald added a subscriber: hiraditya.
sdesmalen requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

FoldTailByMasking is a bit of state that is separately maintained
from ScalarEpilogueStatus, but it's better to make this part
of the ScalarEpilogueStatus itself, by adding a new state
`CM_RequiresNoEpilogue`, so that the separate state no longer
needs to be maintained.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D102438

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp


Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1200,7 +1200,11 @@
   CM_ScalarEpilogueNotNeededUsePredicate,
 
   // Directive indicating we must either tail fold or not vectorize
-  CM_ScalarEpilogueNotAllowedUsePredicate
+  CM_ScalarEpilogueNotAllowedUsePredicate,
+
+  // The epilogue can be dropped entirely because the VF handles all
+  // iterations.
+  CM_RequiresNoEpilogue
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1570,10 +1574,19 @@
   }
 
   /// Returns true if all loop blocks should be masked to fold tail loop.
-  bool foldTailByMasking() const { return FoldTailByMasking; }
+  bool foldTailByMasking() const {
+    switch (ScalarEpilogueStatus) {
+    case CM_RequiresNoEpilogue:
+    case CM_ScalarEpilogueAllowed:
+      return false;
+    case CM_ScalarEpilogueNotAllowedOptSize:
+    case CM_ScalarEpilogueNotAllowedLowTripLoop:
+    case CM_ScalarEpilogueNotNeededUsePredicate:
+    case CM_ScalarEpilogueNotAllowedUsePredicate:
+      return true;
+    }
 
-  bool blockNeedsPredication(BasicBlock *BB) const {
-    return foldTailByMasking() || Legal->blockNeedsPredication(BB);
+    llvm_unreachable("Unexpected status");
   }
 
   /// A SmallMapVector to store the InLoop reduction op chains, mapping phi
@@ -1731,9 +1744,6 @@
   /// iterations to execute in the scalar loop.
   ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
 
-  /// All blocks of loop are to be masked to fold tail of scalar iterations.
-  bool FoldTailByMasking = false;
-
   /// A map holding scalar costs for different vectorization factors. The
   /// presence of a cost for an instruction in the mapping indicates that the
   /// instruction will be scalarized when vectorizing with the associated
@@ -5734,6 +5744,8 @@
   }
 
   switch (ScalarEpilogueStatus) {
+  case CM_RequiresNoEpilogue:
+    llvm_unreachable("No VF has been chosen to determine this");
   case CM_ScalarEpilogueAllowed:
     return computeFeasibleMaxVF(TC, UserVF);
   case CM_ScalarEpilogueNotAllowedUsePredicate:
@@ -5809,6 +5821,7 @@
   if (Rem->isZero()) {
     // Accept MaxVF if we do not have a tail.
     LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+    ScalarEpilogueStatus = CM_RequiresNoEpilogue;
     return MaxVF;
   }
 
@@ -5816,10 +5829,8 @@
   // found modulo the vectorization factor is not zero, try to fold the tail
   // by masking.
   // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
-  if (Legal->prepareToFoldTailByMasking()) {
-    FoldTailByMasking = true;
+  if (Legal->prepareToFoldTailByMasking())
     return MaxVF;
-  }
 
   // If there was a tail-folding hint/switch, but we can't fold the tail by
   // masking, fallback to a vectorization with a scalar epilogue.
@@ -5945,7 +5956,7 @@
 
   unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
 
-  if (!A.Width.isScalable() && !B.Width.isScalable() && FoldTailByMasking &&
+  if (!A.Width.isScalable() && !B.Width.isScalable() && foldTailByMasking() &&
       MaxTripCount) {
     // If we are folding the tail and the trip count is a known (possibly small)
     // constant, the trip count will be rounded up to an integer number of


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102438.345274.patch
Type: text/x-patch
Size: 3415 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210513/ecc9f0cf/attachment.bin>


More information about the llvm-commits mailing list