[PATCH] D66108: [LV] fold-tail flag
Dorit Nuzman via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 13 22:21:25 PDT 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rL368801: [LV] Fold-tail flag (authored by dorit, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D66108?vs=214694&id=215026#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D66108/new/
https://reviews.llvm.org/D66108
Files:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
Index: llvm/trunk/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
===================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | FileCheck -check-prefix=PREDFLAG %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -13,7 +14,15 @@
; CHECK: %index.next = add i64 %index, 8
; CHECK: %12 = icmp eq i64 %index.next, 432
; CHECK: br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0
-
+; PREDFLAG-LABEL: tail_folding_enabled(
+; PREDFLAG: vector.body:
+; PREDFLAG: %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
+; PREDFLAG: %wide.masked.load1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
+; PREDFLAG: %8 = add nsw <8 x i32> %wide.masked.load1, %wide.masked.load
+; PREDFLAG: call void @llvm.masked.store.v8i32.p0v8i32(
+; PREDFLAG: %index.next = add i64 %index, 8
+; PREDFLAG: %12 = icmp eq i64 %index.next, 432
+; PREDFLAG: br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0
entry:
br label %for.body
@@ -40,6 +49,15 @@
; CHECK-NOT: @llvm.masked.load.v8i32.p0v8i32(
; CHECK-NOT: @llvm.masked.store.v8i32.p0v8i32(
; CHECK: br i1 %44, label {{.*}}, label %vector.body
+; PREDFLAG-LABEL: tail_folding_disabled(
+; PREDFLAG: vector.body:
+; PREDFLAG: %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
+; PREDFLAG: %wide.masked.load1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
+; PREDFLAG: %8 = add nsw <8 x i32> %wide.masked.load1, %wide.masked.load
+; PREDFLAG: call void @llvm.masked.store.v8i32.p0v8i32(
+; PREDFLAG: %index.next = add i64 %index, 8
+; PREDFLAG: %12 = icmp eq i64 %index.next, 432
+; PREDFLAG: br i1 %12, label %middle.block, label %vector.body, !llvm.loop !4
entry:
br label %for.body
Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -177,6 +177,14 @@
"value are vectorized only if no scalar iteration overheads "
"are incurred."));
+// Indicates that an epilogue is undesired, predication is preferred.
+// This means that the vectorizer will try to fold the loop-tail (epilogue)
+// into the loop and predicate the loop body accordingly.
+static cl::opt<bool> PreferPredicateOverEpilog(
+ "prefer-predicate-over-epilog", cl::init(false), cl::Hidden,
+ cl::desc("Indicate that an epilogue is undesired, predication should be "
+ "used instead."));
+
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
cl::desc("Maximize bandwidth when selecting vectorization factor which "
@@ -906,7 +914,7 @@
CM_ScalarEpilogueNotAllowedLowTripLoop,
// Loop hint predicate indicating an epilogue is undesired.
- CM_ScalarEpilogueNotNeededPredicatePragma
+ CM_ScalarEpilogueNotNeededUsePredicate
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -4804,9 +4812,9 @@
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
return computeFeasibleMaxVF(TC);
- case CM_ScalarEpilogueNotNeededPredicatePragma:
+ case CM_ScalarEpilogueNotNeededUsePredicate:
LLVM_DEBUG(
- dbgs() << "LV: vector predicate hint found.\n"
+ dbgs() << "LV: vector predicate hint/switch found.\n"
<< "LV: Not allowing scalar epilogue, creating predicated "
<< "vector loop.\n");
break;
@@ -7298,8 +7306,8 @@
(F->hasOptSize() ||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
SEL = CM_ScalarEpilogueNotAllowedOptSize;
- else if (Hints.getPredicate())
- SEL = CM_ScalarEpilogueNotNeededPredicatePragma;
+ else if (PreferPredicateOverEpilog || Hints.getPredicate())
+ SEL = CM_ScalarEpilogueNotNeededUsePredicate;
return SEL;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66108.215026.patch
Type: text/x-patch
Size: 4234 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190814/fc181392/attachment.bin>
More information about the llvm-commits
mailing list