[PATCH] D109368: [LV] Don't vectorize if we can prove RT + vector cost >= scalar cost.
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 7 08:35:47 PDT 2021
fhahn created this revision.
fhahn added reviewers: rengolin, Ayal, gilr, hsaito, lebedev.ri, ebrevnov.
Herald added a subscriber: hiraditya.
fhahn requested review of this revision.
Herald added a project: LLVM.
If we can prove that the cost of the runtime checks + the total vector
loop cost exceed the total scalar cost, vectorization with runtime
checks is not profitable.
This is a first step towards guarding against regressions in cases where
we already know runtime checks are unprofitable, as the heuristics get
tweaked.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D109368
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll
Index: llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll
+++ llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll
@@ -1,4 +1,6 @@
-; RUN: opt -runtime-memory-check-threshold=9 -passes='loop-vectorize' -mtriple=x86_64-unknown-linux -S %s | FileCheck %s
+; REQUIRES: asserts
+
+; RUN: opt -runtime-memory-check-threshold=9 -passes='loop-vectorize' -mtriple=x86_64-unknown-linux -S -debug %s 2>&1 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,9 +12,13 @@
; than running the scalar loop.
; TODO: should not be vectorized.
define void @test(double* nocapture %A, double* nocapture %B, double* nocapture %C, double* nocapture %D, double* nocapture %E) {
+; CHECK: LV: Vectorization is not beneficial due to runtime check cost
+;
; CHECK-LABEL: @test(
-; CHECK: vector.memcheck
-; CHECK: vector.body
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %for.body
+; CHECK-NOT: vector.memcheck
+; CHECK-NOT: vector.body
;
entry:
br label %for.body
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2007,6 +2007,25 @@
}
}
+ InstructionCost getCost(LoopVectorizationCostModel &CM) {
+ InstructionCost RTCheckCost = 0;
+ if (SCEVCheckBlock)
+ for (Instruction &I : *SCEVCheckBlock) {
+ if (SCEVCheckBlock->getTerminator() == &I)
+ continue;
+ RTCheckCost +=
+ CM.getInstructionCost(&I, ElementCount::getFixed(1)).first;
+ }
+ if (MemCheckBlock)
+ for (Instruction &I : *MemCheckBlock) {
+ if (MemCheckBlock->getTerminator() == &I)
+ continue;
+ RTCheckCost +=
+ CM.getInstructionCost(&I, ElementCount::getFixed(1)).first;
+ }
+ return RTCheckCost;
+ }
+
/// Remove the created SCEV & memory runtime check blocks & instructions, if
/// unused.
~GeneratedRTChecks() {
@@ -3304,7 +3323,6 @@
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
-
BasicBlock *const SCEVCheckBlock =
RTChecks.emitSCEVChecks(L, Bypass, LoopVectorPreHeader, LoopExitBlock);
if (!SCEVCheckBlock)
@@ -8164,7 +8182,29 @@
if (!SelectedVF.Width.isScalar())
Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate());
+ bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
// Check if it is profitable to vectorize with runtime checks.
+ if (!ForceVectorization && SelectedVF.Width.getKnownMinValue() > 1) {
+ if (auto ExpectedTC = getSmallBestKnownTC(*PSE.getSE(), OrigLoop)) {
+ InstructionCost RTCost = Checks.getCost(CM);
+ // The total scalar cost is ScalarCost * ExpectedTC and the total vector
+ // cost is (VectorCost / Width) * ExpectedTC. To avoid dividing by a small
+ // number, we multiply ScalarCost * Width instead. To avoid multiplying
+ // with a potential large trip count, we divide by ExpectedTC.
+ InstructionCost ScalarCost =
+ SelectedVF.ScalarCost * SelectedVF.Width.getKnownMinValue();
+ if (ScalarCost <= (RTCost / double(*ExpectedTC) + SelectedVF.Cost)) {
+ LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial due to "
+ "runtime check cost (scalar cost ("
+ << ScalarCost << ") <= runtime check + vector cost ("
+ << (RTCost / double(*ExpectedTC) + SelectedVF.Cost)
+ << ")\n");
+
+ return None;
+ }
+ }
+ }
+
unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) {
bool PragmaThresholdReached =
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D109368.371090.patch
Type: text/x-patch
Size: 4119 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210907/baa85cf4/attachment.bin>
More information about the llvm-commits
mailing list