[PATCH] D93615: [LV] Avoid needless fold tail

Gil Rapaport via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 20 23:29:02 PST 2020


gilr created this revision.
gilr added reviewers: fhahn, Ayal.
Herald added subscribers: javed.absar, hiraditya.
gilr requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

When the trip-count is provably divisible by the maximal/chosen VF, folding the loop's tail during vectorization is redundant.
This commit extends the existing test for constant trip-counts to any trip-count known to be divisible by maximal/selected VF by SCEV.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D93615

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/avoid-needless-fold-tail.ll


Index: llvm/test/Transforms/LoopVectorize/avoid-needless-fold-tail.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/avoid-needless-fold-tail.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; Make sure the loop is vectorized under -Os without folding its tail, based on
+; the trip-count's lower bits being zero.
+; CHECK: vector.body:
+; CHECK: store <4 x i32>
+
+define dso_local void @alignTC(i32* noalias nocapture %A, i32 %n) optsize {
+entry:
+  %alignedTC = and i32 %n, -8
+  br label %loop
+
+loop:
+  %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
+  store i32 13, i32* %arrayidx, align 1
+  %rivPlus1 = add nuw nsw i32 %riv, 1
+  %cond = icmp eq i32 %rivPlus1, %alignedTC
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5508,6 +5508,19 @@
     return MaxVF;
   }
 
+  // Avoid tail folding if the trip count is known to be a multiple of any VF we
+  // chose.
+  ScalarEvolution *SE = PSE.getSE();
+  const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+  const SCEV *ExitCount = SE->getAddExpr(
+      BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
+  unsigned TCisMultipleOf = 1 << SE->GetMinTrailingZeros(ExitCount);
+  if (TCisMultipleOf % MaxVFtimesIC == 0) {
+    // Accept MaxVF if we do not have a tail.
+    LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+    return MaxVF;
+  }
+
   // If we don't know the precise trip count, or if the trip count that we
   // found modulo the vectorization factor is not zero, try to fold the tail
   // by masking.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D93615.313023.patch
Type: text/x-patch
Size: 2053 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201221/e33e6b02/attachment.bin>


More information about the llvm-commits mailing list