[PATCH] D93615: [LV] Avoid needless fold tail
Gil Rapaport via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 21 05:42:01 PST 2020
gilr updated this revision to Diff 313091.
gilr added a comment.
Add a test for a constant TC with IC=3.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D93615/new/
https://reviews.llvm.org/D93615
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll
llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
Index: llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; Make sure loops are vectorized under -Os without folding its tail based on
+; their trip-count's lower bits being zero.
+
+; CHECK-LABEL: alignTC
+; CHECK: vector.body:
+; CHECK: store <4 x i32>
+
+define dso_local void @alignTC(i32* noalias nocapture %A, i32 %n) optsize {
+entry:
+ %alignedTC = and i32 %n, -8
+ br label %loop
+
+loop:
+ %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
+ store i32 13, i32* %arrayidx, align 1
+ %rivPlus1 = add nuw nsw i32 %riv, 1
+ %cond = icmp eq i32 %rivPlus1, %alignedTC
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
Index: llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=3 -force-vector-width=2 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; Make sure loop is unrolled under -Os without folding its tail based on the its trip-count
+; being provably divisible by chosen VFxIC.
+
+; CHECK-LABEL: constTC
+; CHECK: vector.body:
+; CHECK-COUNT-3: store <2 x i32>
+; CHECK: br i1
+
+define dso_local void @constTC(i32* noalias nocapture %A) optsize {
+entry:
+ br label %loop
+
+loop:
+ %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
+ store i32 13, i32* %arrayidx, align 1
+ %rivPlus1 = add nuw nsw i32 %riv, 1
+ %cond = icmp eq i32 %rivPlus1, 1800
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5508,6 +5508,19 @@
return MaxVF;
}
+ // Avoid tail folding if the trip count is known to be a multiple of any VF we
+ // chose.
+ ScalarEvolution *SE = PSE.getSE();
+ const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+ const SCEV *ExitCount = SE->getAddExpr(
+ BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
+ unsigned TCisMultipleOf = 1 << SE->GetMinTrailingZeros(ExitCount);
+ if (TCisMultipleOf % MaxVFtimesIC == 0) {
+ // Accept MaxVF if we do not have a tail.
+ LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+ return MaxVF;
+ }
+
// If we don't know the precise trip count, or if the trip count that we
// found modulo the vectorization factor is not zero, try to fold the tail
// by masking.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D93615.313091.patch
Type: text/x-patch
Size: 3208 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201221/d7516794/attachment.bin>
More information about the llvm-commits
mailing list