[llvm] 9392c0d - Revert "[SLP] Remove cap on schedule window size"
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 23 13:12:16 PST 2022
Author: Philip Reames
Date: 2022-02-23T13:12:07-08:00
New Revision: 9392c0d4efc1aa6cb1f20c4b42cc4e2595fda265
URL: https://github.com/llvm/llvm-project/commit/9392c0d4efc1aa6cb1f20c4b42cc4e2595fda265
DIFF: https://github.com/llvm/llvm-project/commit/9392c0d4efc1aa6cb1f20c4b42cc4e2595fda265.diff
LOG: Revert "[SLP] Remove cap on schedule window size"
This reverts commit 6adf4b039e095224edbbecda5972e5e3353b53b6. Reverting while investigating https://github.com/llvm/llvm-project/issues/54029
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 47e23fb636e57..12ab69aa74cea 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -138,6 +138,14 @@ static cl::opt<int>
MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
cl::desc("Maximum depth of the lookup for consecutive stores."));
+/// Limits the size of scheduling regions in a block.
+/// It avoid long compile times for _very_ large blocks where vector
+/// instructions are spread over a wide range.
+/// This limit is way higher than needed by real-world functions.
+static cl::opt<int>
+ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
+ cl::desc("Limit the size of the SLP scheduling region per block"));
+
static cl::opt<int> MinVectorRegSizeOption(
"slp-min-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
@@ -169,6 +177,10 @@ static const unsigned AliasedCheckLimit = 10;
// This limit is useful for very large basic blocks.
static const unsigned MaxMemDepDistance = 160;
+/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
+/// regions to be handled.
+static const int MinScheduleRegionSize = 16;
+
/// Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
@@ -2612,6 +2624,13 @@ class BoUpSLP {
FirstLoadStoreInRegion = nullptr;
LastLoadStoreInRegion = nullptr;
+ // Reduce the maximum schedule region size by the size of the
+ // previous scheduling run.
+ ScheduleRegionSizeLimit -= ScheduleRegionSize;
+ if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
+ ScheduleRegionSizeLimit = MinScheduleRegionSize;
+ ScheduleRegionSize = 0;
+
// Make a new scheduling region, i.e. all existing ScheduleData is not
// in the new region yet.
++SchedulingRegionID;
@@ -2792,7 +2811,7 @@ class BoUpSLP {
/// Extends the scheduling region so that V is inside the region.
/// \returns true if the region size is within the limit.
- void extendSchedulingRegion(Value *V, const InstructionsState &S);
+ bool extendSchedulingRegion(Value *V, const InstructionsState &S);
/// Initialize the ScheduleData structures for new instructions in the
/// scheduling region.
@@ -2846,6 +2865,12 @@ class BoUpSLP {
/// (can be null).
ScheduleData *LastLoadStoreInRegion = nullptr;
+ /// The current size of the scheduling region.
+ int ScheduleRegionSize = 0;
+
+ /// The maximum size allowed for the scheduling region.
+ int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
+
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
/// Make sure that the initial SchedulingRegionID is greater than the
@@ -7489,9 +7514,11 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
ReSchedule = true;
}
- LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
- << " in block " << BB->getName() << "\n");
- calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
+ if (Bundle) {
+ LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
+ << " in block " << BB->getName() << "\n");
+ calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
+ }
if (ReSchedule) {
resetSchedule();
@@ -7502,7 +7529,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// dependencies. As soon as the bundle is "ready" it means that there are no
// cyclic dependencies and we can schedule it. Note that's important that we
// don't "schedule" the bundle yet (see cancelScheduling).
- while (!Bundle->isReady() && !ReadyInsts.empty()) {
+ while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
+ !ReadyInsts.empty()) {
ScheduleData *Picked = ReadyInsts.pop_back_val();
assert(Picked->isSchedulingEntity() && Picked->isReady() &&
"must be ready to schedule");
@@ -7512,8 +7540,18 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// Make sure that the scheduling region contains all
// instructions of the bundle.
- for (Value *V : VL)
- extendSchedulingRegion(V, S);
+ for (Value *V : VL) {
+ if (!extendSchedulingRegion(V, S)) {
+ // If the scheduling region got new instructions at the lower end (or it
+ // is a new region for the first bundle). This makes it necessary to
+ // recalculate all dependencies.
+ // Otherwise the compiler may crash trying to incorrectly calculate
+ // dependencies and emit instruction in the wrong order at the actual
+ // scheduling.
+ TryScheduleBundleImpl(/*ReSchedule=*/false, nullptr);
+ return None;
+ }
+ }
bool ReSchedule = false;
for (Value *V : VL) {
@@ -7583,11 +7621,10 @@ BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
return &(ScheduleDataChunks.back()[ChunkPos++]);
}
-void
-BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
- const InstructionsState &S) {
+bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
+ const InstructionsState &S) {
if (getScheduleData(V, isOneOf(S, V)))
- return;
+ return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
@@ -7606,7 +7643,7 @@ BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
return true;
};
if (CheckSheduleForI(I))
- return;
+ return true;
if (!ScheduleStart) {
// It's the first instruction in the new region.
initScheduleData(I, I->getNextNode(), nullptr, nullptr);
@@ -7616,7 +7653,7 @@ BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
CheckSheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
- return;
+ return true;
}
// Search up and down at the same time, because we don't know if the new
// instruction is above or below the existing scheduling region.
@@ -7627,6 +7664,11 @@ BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
BasicBlock::iterator LowerEnd = BB->end();
while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
&*DownIter != I) {
+ if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
+ LLVM_DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
+ return false;
+ }
+
++UpIter;
++DownIter;
}
@@ -7639,7 +7681,7 @@ BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
CheckSheduleForI(I);
LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I
<< "\n");
- return;
+ return true;
}
assert((UpIter == UpperEnd || (DownIter != LowerEnd && &*DownIter == I)) &&
"Expected to reach top of the basic block or instruction down the "
@@ -7653,7 +7695,7 @@ BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
CheckSheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
- return;
+ return true;
}
void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index 1faadaba2bd72..e9c502b6982cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT
-; RUN: opt < %s -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
-; RUN: opt < %s -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
+; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
+; RUN: opt < %s -slp-schedule-budget=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@@ -35,14 +35,41 @@ define void @PR28330(i32 %n) {
;
; MAX-COST-LABEL: @PR28330(
; MAX-COST-NEXT: entry:
-; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
-; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[P0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
+; MAX-COST-NEXT: [[P1:%.*]] = icmp eq i8 [[P0]], 0
+; MAX-COST-NEXT: [[P2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
+; MAX-COST-NEXT: [[P3:%.*]] = icmp eq i8 [[P2]], 0
+; MAX-COST-NEXT: [[P4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
+; MAX-COST-NEXT: [[P5:%.*]] = icmp eq i8 [[P4]], 0
+; MAX-COST-NEXT: [[P6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
+; MAX-COST-NEXT: [[P7:%.*]] = icmp eq i8 [[P6]], 0
+; MAX-COST-NEXT: [[P8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
+; MAX-COST-NEXT: [[P9:%.*]] = icmp eq i8 [[P8]], 0
+; MAX-COST-NEXT: [[P10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
+; MAX-COST-NEXT: [[P11:%.*]] = icmp eq i8 [[P10]], 0
+; MAX-COST-NEXT: [[P12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
+; MAX-COST-NEXT: [[P13:%.*]] = icmp eq i8 [[P12]], 0
+; MAX-COST-NEXT: [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
+; MAX-COST-NEXT: [[P15:%.*]] = icmp eq i8 [[P14]], 0
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
; MAX-COST: for.body:
-; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
+; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT: [[P19:%.*]] = select i1 [[P1]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P20:%.*]] = add i32 [[P17]], [[P19]]
+; MAX-COST-NEXT: [[P21:%.*]] = select i1 [[P3]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P22:%.*]] = add i32 [[P20]], [[P21]]
+; MAX-COST-NEXT: [[P23:%.*]] = select i1 [[P5]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P24:%.*]] = add i32 [[P22]], [[P23]]
+; MAX-COST-NEXT: [[P25:%.*]] = select i1 [[P7]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P26:%.*]] = add i32 [[P24]], [[P25]]
+; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]]
+; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P30:%.*]] = add i32 [[P28]], [[P29]]
+; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[P30]], [[P31]]
+; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P34]] = add i32 [[P32]], [[P33]]
; MAX-COST-NEXT: br label [[FOR_BODY]]
;
entry:
@@ -112,14 +139,30 @@ define void @PR32038(i32 %n) {
;
; MAX-COST-LABEL: @PR32038(
; MAX-COST-NEXT: entry:
-; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
-; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <4 x i8>*), align 1
+; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer
+; MAX-COST-NEXT: [[P8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
+; MAX-COST-NEXT: [[P9:%.*]] = icmp eq i8 [[P8]], 0
+; MAX-COST-NEXT: [[P10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
+; MAX-COST-NEXT: [[P11:%.*]] = icmp eq i8 [[P10]], 0
+; MAX-COST-NEXT: [[P12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
+; MAX-COST-NEXT: [[P13:%.*]] = icmp eq i8 [[P12]], 0
+; MAX-COST-NEXT: [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
+; MAX-COST-NEXT: [[P15:%.*]] = icmp eq i8 [[P14]], 0
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
; MAX-COST: for.body:
-; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
-; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
+; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
+; MAX-COST-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], [[P27]]
+; MAX-COST-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[P29]]
+; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP5]], -5
+; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
+; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
+; MAX-COST-NEXT: [[P34]] = add i32 [[P32]], [[P33]]
; MAX-COST-NEXT: br label [[FOR_BODY]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
index 56f6b7b5d3588..7b6e6ca3c61af 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
define void @exceed(double %0, double %1) {
; CHECK-LABEL: @exceed(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll
index a1b5f293602bd..293dcc0b1ef9e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 -slp-schedule-budget=1 | FileCheck %s
define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @g(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
index ecffc1adb793c..61f25dd713775 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 -slp-schedule-budget=1 | FileCheck %s
define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @g(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
index fa5534732b7a3..3e4cfe6e05157 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basic-aa -slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -15,9 +15,6 @@ define void @test(float * %a, float * %b, float * %c, float * %d) {
; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 1
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
-; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
-; CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
-; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: call void @unknown()
@@ -48,6 +45,9 @@ define void @test(float * %a, float * %b, float * %c, float * %d) {
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
+; CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
+; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[B]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 1
More information about the llvm-commits
mailing list