[llvm] r295538 - Increases full-unroll threshold.
Dehao Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 17 19:46:52 PST 2017
Author: dehao
Date: Fri Feb 17 21:46:51 2017
New Revision: 295538
URL: http://llvm.org/viewvc/llvm-project?rev=295538&view=rev
Log:
Increases full-unroll threshold.
Summary:
The default threshold for fully unroll is too conservative. This patch doubles the full-unroll threshold
This change will affect the following speccpu2006 benchmarks (performance numbers were collected from Intel Sandybridge):
Performance:
403 0.11%
433 0.51%
445 0.48%
447 3.50%
453 1.49%
464 0.75%
Code size:
403 0.56%
433 0.96%
445 2.16%
447 2.96%
453 0.94%
464 8.02%
The compiler time overhead is similar with code size.
Reviewers: davidxl, mkuper, mzolotukhin, hfinkel, chandlerc
Reviewed By: hfinkel, chandlerc
Subscribers: mehdi_amini, zzheng, efriedma, haicheng, hfinkel, llvm-commits
Differential Revision: https://reviews.llvm.org/D28368
Modified:
llvm/trunk/include/llvm/Transforms/Scalar.h
llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h
llvm/trunk/lib/Passes/PassBuilder.cpp
llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll
Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=295538&r1=295537&r2=295538&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar.h Fri Feb 17 21:46:51 2017
@@ -181,11 +181,11 @@ Pass *createLoopInstSimplifyPass();
//
// LoopUnroll - This pass is a simple loop unrolling pass.
//
-Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1,
+Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1,
int AllowPartial = -1, int Runtime = -1,
int UpperBound = -1);
// Create an unrolling pass for full unrolling that uses exact trip count only.
-Pass *createSimpleLoopUnrollPass();
+Pass *createSimpleLoopUnrollPass(int OptLevel);
//===----------------------------------------------------------------------===//
//
Modified: llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h?rev=295538&r1=295537&r2=295538&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h Fri Feb 17 21:46:51 2017
@@ -18,9 +18,10 @@ namespace llvm {
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
const bool AllowPartialUnrolling;
+ const int OptLevel;
- explicit LoopUnrollPass(bool AllowPartialUnrolling)
- : AllowPartialUnrolling(AllowPartialUnrolling) {}
+ explicit LoopUnrollPass(bool AllowPartialUnrolling, int OptLevel)
+ : AllowPartialUnrolling(AllowPartialUnrolling), OptLevel(OptLevel) {}
public:
/// Create an instance of the loop unroll pass that will support both full
@@ -28,16 +29,16 @@ public:
///
/// This uses the target information (or flags) to control the thresholds for
/// different unrolling stategies but supports all of them.
- static LoopUnrollPass create() {
- return LoopUnrollPass(/*AllowPartialUnrolling*/ true);
+ static LoopUnrollPass create(int OptLevel = 2) {
+ return LoopUnrollPass(/*AllowPartialUnrolling*/ true, OptLevel);
}
/// Create an instance of the loop unroll pass that only does full loop
/// unrolling.
///
/// This will disable any runtime or partial unrolling.
- static LoopUnrollPass createFull() {
- return LoopUnrollPass(/*AllowPartialUnrolling*/ false);
+ static LoopUnrollPass createFull(int OptLevel = 2) {
+ return LoopUnrollPass(/*AllowPartialUnrolling*/ false, OptLevel);
}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
Modified: llvm/trunk/lib/Passes/PassBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassBuilder.cpp?rev=295538&r1=295537&r2=295538&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp (original)
+++ llvm/trunk/lib/Passes/PassBuilder.cpp Fri Feb 17 21:46:51 2017
@@ -334,7 +334,7 @@ PassBuilder::buildFunctionSimplification
LPM2.addPass(IndVarSimplifyPass());
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(LoopDeletionPass());
- LPM2.addPass(LoopUnrollPass::createFull());
+ LPM2.addPass(LoopUnrollPass::createFull(Level));
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
@@ -605,7 +605,7 @@ PassBuilder::buildPerModuleDefaultPipeli
// FIXME: It would be really good to use a loop-integrated instruction
// combiner for cleanup here so that the unrolling and LICM can be pipelined
// across the loop nests.
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create()));
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level)));
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=295538&r1=295537&r2=295538&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Fri Feb 17 21:46:51 2017
@@ -320,7 +320,7 @@ void PassManagerBuilder::addFunctionSimp
MPM.add(createCFGSimplificationPass());
}
if (!DisableUnrollLoops)
- MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
if (OptLevel > 1) {
@@ -366,7 +366,7 @@ void PassManagerBuilder::addFunctionSimp
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ MPM.add(createLoopUnrollPass(OptLevel));
}
}
@@ -612,7 +612,7 @@ void PassManagerBuilder::populateModuleP
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ MPM.add(createLoopUnrollPass(OptLevel));
}
}
@@ -621,7 +621,7 @@ void PassManagerBuilder::populateModuleP
addInstructionCombiningPass(MPM);
if (!DisableUnrollLoops) {
- MPM.add(createLoopUnrollPass()); // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
// LoopUnroll may generate some redundency to cleanup.
addInstructionCombiningPass(MPM);
@@ -772,11 +772,11 @@ void PassManagerBuilder::addLTOOptimizat
PM.add(createLoopInterchangePass());
if (!DisableUnrollLoops)
- PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
PM.add(createLoopVectorizePass(true, LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- PM.add(createLoopUnrollPass());
+ PM.add(createLoopUnrollPass(OptLevel));
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=295538&r1=295537&r2=295538&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri Feb 17 21:46:51 2017
@@ -131,13 +131,14 @@ static const unsigned NoThreshold = UINT
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
- Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
- Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
+ Loop *L, const TargetTransformInfo &TTI, int OptLevel,
+ Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
+ Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
+ Optional<bool> UserUpperBound) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
- UP.Threshold = 150;
+ UP.Threshold = OptLevel > 2 ? 300 : 150;
UP.MaxPercentThresholdBoost = 400;
UP.OptSizeThreshold = 0;
UP.PartialThreshold = 150;
@@ -927,7 +928,7 @@ static bool computeUnrollCount(
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE, const TargetTransformInfo &TTI,
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
- bool PreserveLCSSA,
+ bool PreserveLCSSA, int OptLevel,
Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold,
Optional<bool> ProvidedAllowPartial,
@@ -947,7 +948,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
ProvidedRuntime, ProvidedUpperBound);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
@@ -1047,16 +1048,17 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(Optional<unsigned> Threshold = None,
+ LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None)
- : LoopPass(ID), ProvidedCount(std::move(Count)),
+ : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
+ int OptLevel;
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@@ -1081,7 +1083,7 @@ public:
OptimizationRemarkEmitter ORE(&F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel,
ProvidedCount, ProvidedThreshold,
ProvidedAllowPartial, ProvidedRuntime,
ProvidedUpperBound);
@@ -1107,21 +1109,22 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime, int UpperBound) {
+Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
+ int AllowPartial, int Runtime,
+ int UpperBound) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
- return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
- Count == -1 ? None : Optional<unsigned>(Count),
- AllowPartial == -1 ? None
- : Optional<bool>(AllowPartial),
- Runtime == -1 ? None : Optional<bool>(Runtime),
- UpperBound == -1 ? None : Optional<bool>(UpperBound));
+ return new LoopUnroll(
+ OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime),
+ UpperBound == -1 ? None : Optional<bool>(UpperBound));
}
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
+ return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
}
PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1153,10 +1156,10 @@ PreservedAnalyses LoopUnrollPass::run(Lo
Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam;
if (!AllowPartialUnrolling)
AllowPartialParam = RuntimeParam = UpperBoundParam = false;
- bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
- /*PreserveLCSSA*/ true, /*Count*/ None,
- /*Threshold*/ None, AllowPartialParam,
- RuntimeParam, UpperBoundParam);
+ bool Changed = tryToUnrollLoop(
+ &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
+ /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+ /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
if (!Changed)
return PreservedAnalyses::all();
Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll?rev=295538&r1=295537&r2=295538&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll Fri Feb 17 21:46:51 2017
@@ -1,13 +1,14 @@
; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
-; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC
; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC
; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
-; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
+; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
; This file tests the llvm.loop.vectorize.enable metadata forcing
; vectorization even when optimization levels are too low, or when
@@ -25,6 +26,9 @@ target triple = "x86_64-unknown-linux-gn
; O3-LABEL: @enabled(
; O3: store <4 x i32>
; O3: ret i32
+; O3DEFAULT-LABEL: @enabled(
+; O3DEFAULT: store <4 x i32>
+; O3DEFAULT: ret i32
; Pragma always wins!
; O3DIS-LABEL: @enabled(
; O3DIS: store <4 x i32>
@@ -77,6 +81,9 @@ for.end:
; O3-LABEL: @nopragma(
; O3: store <4 x i32>
; O3: ret i32
+; O3DEFAULT-LABEL: @nopragma(
+; O3DEFAULT: store <4 x i32>
+; O3DEFAULT: ret i32
; O3DIS-LABEL: @nopragma(
; O3DIS-NOT: store <4 x i32>
; O3DIS: ret i32
@@ -128,6 +135,9 @@ for.end:
; O3-LABEL: @disabled(
; O3-NOT: store <4 x i32>
; O3: ret i32
+; O3DEFAULT-LABEL: @disabled(
+; O3DEFAULT: store <4 x i32>
+; O3DEFAULT: ret i32
; O3DIS-LABEL: @disabled(
; O3DIS-NOT: store <4 x i32>
; O3DIS: ret i32
More information about the llvm-commits
mailing list