[llvm] r248917 - SLPVectorizer: limit the scheduling region size per basic block.
Erik Eckstein via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 30 10:00:44 PDT 2015
Author: eeckstein
Date: Wed Sep 30 12:00:44 2015
New Revision: 248917
URL: http://llvm.org/viewvc/llvm-project?rev=248917&view=rev
Log:
SLPVectorizer: limit the scheduling region size per basic block.
Usually large blocks are not a problem. But if a large block (> 10k instructions)
contains many (potential) chains of vector instructions, and those chains are
spread over a wide range of instructions, then scheduling becomes a compile time problem.
This change introduces a limit for the accumulate scheduling region size of a block.
For real-world functions this limit will never be exceeded (it's about 10x larger than
the maximum value seen in the test-suite and external test suite).
Added:
llvm/trunk/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=248917&r1=248916&r2=248917&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Sep 30 12:00:44 2015
@@ -73,6 +73,14 @@ static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
+/// Limits the size of scheduling regions in a block.
+/// It avoid long compile times for _very_ large blocks where vector
+/// instructions are spread over a wide range.
+/// This limit is way higher than needed by real-world functions.
+static cl::opt<int>
+ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
+ cl::desc("Limit the size of the SLP scheduling region per block"));
+
namespace {
// FIXME: Set this via cl::opt to allow overriding.
@@ -89,6 +97,10 @@ static const unsigned AliasedCheckLimit
// This limit is useful for very large basic blocks.
static const unsigned MaxMemDepDistance = 160;
+/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
+/// regions to be handled.
+static const int MinScheduleRegionSize = 16;
+
/// \brief Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
@@ -720,6 +732,8 @@ private:
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
ScheduleStart(nullptr), ScheduleEnd(nullptr),
FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
+ ScheduleRegionSize(0),
+ ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
// Make sure that the initial SchedulingRegionID is greater than the
// initial SchedulingRegionID in ScheduleData (which is 0).
SchedulingRegionID(1) {}
@@ -731,6 +745,13 @@ private:
FirstLoadStoreInRegion = nullptr;
LastLoadStoreInRegion = nullptr;
+ // Reduce the maximum schedule region size by the size of the
+ // previous scheduling run.
+ ScheduleRegionSizeLimit -= ScheduleRegionSize;
+ if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
+ ScheduleRegionSizeLimit = MinScheduleRegionSize;
+ ScheduleRegionSize = 0;
+
// Make a new scheduling region, i.e. all existing ScheduleData is not
// in the new region yet.
++SchedulingRegionID;
@@ -807,7 +828,8 @@ private:
void cancelScheduling(ArrayRef<Value *> VL);
/// Extends the scheduling region so that V is inside the region.
- void extendSchedulingRegion(Value *V);
+ /// \returns true if the region size is within the limit.
+ bool extendSchedulingRegion(Value *V);
/// Initialize the ScheduleData structures for new instructions in the
/// scheduling region.
@@ -861,6 +883,12 @@ private:
/// (can be null).
ScheduleData *LastLoadStoreInRegion;
+ /// The current size of the scheduling region.
+ int ScheduleRegionSize;
+
+ /// The maximum size allowed for the scheduling region.
+ int ScheduleRegionSizeLimit;
+
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
int SchedulingRegionID;
@@ -1080,7 +1108,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
if (!BS.tryScheduleBundle(VL, this)) {
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
- BS.cancelScheduling(VL);
+ assert((!BS.getScheduleData(VL[0]) ||
+ !BS.getScheduleData(VL[0])->isPartOfBundle()) &&
+ "tryScheduleBundle should cancelScheduling on failure");
newTreeEntry(VL, false);
return;
}
@@ -2686,8 +2716,15 @@ bool BoUpSLP::BlockScheduling::trySchedu
ScheduleData *Bundle = nullptr;
bool ReSchedule = false;
DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
+
+ // Make sure that the scheduling region contains all
+ // instructions of the bundle.
+ for (Value *V : VL) {
+ if (!extendSchedulingRegion(V))
+ return false;
+ }
+
for (Value *V : VL) {
- extendSchedulingRegion(V);
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member (maybe not in same basic block)");
@@ -2748,7 +2785,11 @@ bool BoUpSLP::BlockScheduling::trySchedu
schedule(pickedSD, ReadyInsts);
}
}
- return Bundle->isReady();
+ if (!Bundle->isReady()) {
+ cancelScheduling(VL);
+ return false;
+ }
+ return true;
}
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
@@ -2777,9 +2818,9 @@ void BoUpSLP::BlockScheduling::cancelSch
}
}
-void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
+bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
if (getScheduleData(V))
- return;
+ return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
@@ -2790,7 +2831,7 @@ void BoUpSLP::BlockScheduling::extendSch
ScheduleEnd = I->getNextNode();
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
- return;
+ return true;
}
// Search up and down at the same time, because we don't know if the new
// instruction is above or below the existing scheduling region.
@@ -2799,12 +2840,17 @@ void BoUpSLP::BlockScheduling::extendSch
BasicBlock::iterator DownIter(ScheduleEnd);
BasicBlock::iterator LowerEnd = BB->end();
for (;;) {
+ if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
+ DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
+ return false;
+ }
+
if (UpIter != UpperEnd) {
if (&*UpIter == I) {
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
ScheduleStart = I;
DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n");
- return;
+ return true;
}
UpIter++;
}
@@ -2815,13 +2861,14 @@ void BoUpSLP::BlockScheduling::extendSch
ScheduleEnd = I->getNextNode();
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
- return;
+ return true;
}
DownIter++;
}
assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
"instruction not found in block");
}
+ return true;
}
void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/schedule_budget.ll?rev=248917&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/schedule_budget.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/schedule_budget.ll Wed Sep 30 12:00:44 2015
@@ -0,0 +1,66 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test if the budget for the scheduling region size works.
+; We test with a reduced budget of 16 which should prevent vectorizing the loads.
+
+declare void @unknown()
+
+; CHECK-LABEL: @test
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: call void @unknown
+define void @test(float * %a, float * %b) {
+entry:
+ %l0 = load float, float* %a
+ %a1 = getelementptr inbounds float, float* %a, i64 1
+ %l1 = load float, float* %a1
+ %a2 = getelementptr inbounds float, float* %a, i64 2
+ %l2 = load float, float* %a2
+ %a3 = getelementptr inbounds float, float* %a, i64 3
+ %l3 = load float, float* %a3
+
+ ; some unrelated instructions inbetween to enlarge the scheduling region
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+
+ store float %l0, float* %b
+ %b1 = getelementptr inbounds float, float* %b, i64 1
+ store float %l1, float* %b1
+ %b2 = getelementptr inbounds float, float* %b, i64 2
+ store float %l2, float* %b2
+ %b3 = getelementptr inbounds float, float* %b, i64 3
+ store float %l3, float* %b3
+ ret void
+}
+
More information about the llvm-commits
mailing list