[llvm] r295538 - Increases full-unroll threshold.
Bill Seurer via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 20 13:40:25 PST 2017
This change breaks one of the tests from the testsuite when run by lnt
on powerpcle. See
http://lab.llvm.org:8011/builders/clang-ppc64le-linux-lnt/builds/3129
Some info from a local run I did:
---------------------------------------------------------------
>>> =========
'/home/seurer/sandbox/test-2017-02-20_19-32-13/MultiSource/Applications/oggenc/oggenc'
Program
---------------------------------------------------------------
TEST-PASS: compile
/home/seurer/sandbox/test-2017-02-20_19-32-13/MultiSource/Applications/oggenc/oggenc
TEST-RESULT-compile-success: pass
TEST-RESULT-compile-hash: de7b7004f7c0796635bcfacf44886a6a
TEST-RESULT-compile-time: user 19.615300
TEST-RESULT-compile-real-time: real 19.785200
TEST-FAIL: exec
/home/seurer/sandbox/test-2017-02-20_19-32-13/MultiSource/Applications/oggenc/oggenc
TEST-RESULT-exec-time: user 0.1646
TEST-RESULT-exec-real-time: real 0.1680
On 02/17/2017 08:46 PM, Dehao Chen via llvm-commits wrote:
> Author: dehao
> Date: Fri Feb 17 21:46:51 2017
> New Revision: 295538
>
> URL: http://llvm.org/viewvc/llvm-project?rev=295538&view=rev
> Log:
> Increases full-unroll threshold.
>
> Summary:
> The default threshold for fully unroll is too conservative. This patch doubles the full-unroll threshold
>
> This change will affect the following speccpu2006 benchmarks (performance numbers were collected from Intel Sandybridge):
>
> Performance:
>
> 403 0.11%
> 433 0.51%
> 445 0.48%
> 447 3.50%
> 453 1.49%
> 464 0.75%
>
> Code size:
>
> 403 0.56%
> 433 0.96%
> 445 2.16%
> 447 2.96%
> 453 0.94%
> 464 8.02%
>
> The compiler time overhead is similar with code size.
>
> Reviewers: davidxl, mkuper, mzolotukhin, hfinkel, chandlerc
>
> Reviewed By: hfinkel, chandlerc
>
> Subscribers: mehdi_amini, zzheng, efriedma, haicheng, hfinkel, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D28368
>
> Modified:
> llvm/trunk/include/llvm/Transforms/Scalar.h
> llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h
> llvm/trunk/lib/Passes/PassBuilder.cpp
> llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
> llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll
>
> Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=295538&r1=295537&r2=295538&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Scalar.h Fri Feb 17 21:46:51 2017
> @@ -181,11 +181,11 @@ Pass *createLoopInstSimplifyPass();
> //
> // LoopUnroll - This pass is a simple loop unrolling pass.
> //
> -Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1,
> +Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1,
> int AllowPartial = -1, int Runtime = -1,
> int UpperBound = -1);
> // Create an unrolling pass for full unrolling that uses exact trip count only.
> -Pass *createSimpleLoopUnrollPass();
> +Pass *createSimpleLoopUnrollPass(int OptLevel);
>
> //===----------------------------------------------------------------------===//
> //
>
> Modified: llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h?rev=295538&r1=295537&r2=295538&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h Fri Feb 17 21:46:51 2017
> @@ -18,9 +18,10 @@ namespace llvm {
>
> class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
> const bool AllowPartialUnrolling;
> + const int OptLevel;
>
> - explicit LoopUnrollPass(bool AllowPartialUnrolling)
> - : AllowPartialUnrolling(AllowPartialUnrolling) {}
> + explicit LoopUnrollPass(bool AllowPartialUnrolling, int OptLevel)
> + : AllowPartialUnrolling(AllowPartialUnrolling), OptLevel(OptLevel) {}
>
> public:
> /// Create an instance of the loop unroll pass that will support both full
> @@ -28,16 +29,16 @@ public:
> ///
> /// This uses the target information (or flags) to control the thresholds for
> /// different unrolling stategies but supports all of them.
> - static LoopUnrollPass create() {
> - return LoopUnrollPass(/*AllowPartialUnrolling*/ true);
> + static LoopUnrollPass create(int OptLevel = 2) {
> + return LoopUnrollPass(/*AllowPartialUnrolling*/ true, OptLevel);
> }
>
> /// Create an instance of the loop unroll pass that only does full loop
> /// unrolling.
> ///
> /// This will disable any runtime or partial unrolling.
> - static LoopUnrollPass createFull() {
> - return LoopUnrollPass(/*AllowPartialUnrolling*/ false);
> + static LoopUnrollPass createFull(int OptLevel = 2) {
> + return LoopUnrollPass(/*AllowPartialUnrolling*/ false, OptLevel);
> }
>
> PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
>
> Modified: llvm/trunk/lib/Passes/PassBuilder.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassBuilder.cpp?rev=295538&r1=295537&r2=295538&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Passes/PassBuilder.cpp (original)
> +++ llvm/trunk/lib/Passes/PassBuilder.cpp Fri Feb 17 21:46:51 2017
> @@ -334,7 +334,7 @@ PassBuilder::buildFunctionSimplification
> LPM2.addPass(IndVarSimplifyPass());
> LPM2.addPass(LoopIdiomRecognizePass());
> LPM2.addPass(LoopDeletionPass());
> - LPM2.addPass(LoopUnrollPass::createFull());
> + LPM2.addPass(LoopUnrollPass::createFull(Level));
>
> // We provide the opt remark emitter pass for LICM to use. We only need to do
> // this once as it is immutable.
> @@ -605,7 +605,7 @@ PassBuilder::buildPerModuleDefaultPipeli
> // FIXME: It would be really good to use a loop-integrated instruction
> // combiner for cleanup here so that the unrolling and LICM can be pipelined
> // across the loop nests.
> - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create()));
> + OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level)));
> OptimizePM.addPass(InstCombinePass());
> OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
> OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
>
> Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=295538&r1=295537&r2=295538&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Fri Feb 17 21:46:51 2017
> @@ -320,7 +320,7 @@ void PassManagerBuilder::addFunctionSimp
> MPM.add(createCFGSimplificationPass());
> }
> if (!DisableUnrollLoops)
> - MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
> + MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
> addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
>
> if (OptLevel > 1) {
> @@ -366,7 +366,7 @@ void PassManagerBuilder::addFunctionSimp
>
> // BBVectorize may have significantly shortened a loop body; unroll again.
> if (!DisableUnrollLoops)
> - MPM.add(createLoopUnrollPass());
> + MPM.add(createLoopUnrollPass(OptLevel));
> }
> }
>
> @@ -612,7 +612,7 @@ void PassManagerBuilder::populateModuleP
>
> // BBVectorize may have significantly shortened a loop body; unroll again.
> if (!DisableUnrollLoops)
> - MPM.add(createLoopUnrollPass());
> + MPM.add(createLoopUnrollPass(OptLevel));
> }
> }
>
> @@ -621,7 +621,7 @@ void PassManagerBuilder::populateModuleP
> addInstructionCombiningPass(MPM);
>
> if (!DisableUnrollLoops) {
> - MPM.add(createLoopUnrollPass()); // Unroll small loops
> + MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
>
> // LoopUnroll may generate some redundency to cleanup.
> addInstructionCombiningPass(MPM);
> @@ -772,11 +772,11 @@ void PassManagerBuilder::addLTOOptimizat
> PM.add(createLoopInterchangePass());
>
> if (!DisableUnrollLoops)
> - PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
> + PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
> PM.add(createLoopVectorizePass(true, LoopVectorize));
> // The vectorizer may have significantly shortened a loop body; unroll again.
> if (!DisableUnrollLoops)
> - PM.add(createLoopUnrollPass());
> + PM.add(createLoopUnrollPass(OptLevel));
>
> // Now that we've optimized loops (in particular loop induction variables),
> // we may have exposed more scalar opportunities. Run parts of the scalar
>
> Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=295538&r1=295537&r2=295538&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri Feb 17 21:46:51 2017
> @@ -131,13 +131,14 @@ static const unsigned NoThreshold = UINT
> /// Gather the various unrolling parameters based on the defaults, compiler
> /// flags, TTI overrides and user specified parameters.
> static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
> - Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
> - Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
> - Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
> + Loop *L, const TargetTransformInfo &TTI, int OptLevel,
> + Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
> + Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
> + Optional<bool> UserUpperBound) {
> TargetTransformInfo::UnrollingPreferences UP;
>
> // Set up the defaults
> - UP.Threshold = 150;
> + UP.Threshold = OptLevel > 2 ? 300 : 150;
> UP.MaxPercentThresholdBoost = 400;
> UP.OptSizeThreshold = 0;
> UP.PartialThreshold = 150;
> @@ -927,7 +928,7 @@ static bool computeUnrollCount(
> static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
> ScalarEvolution *SE, const TargetTransformInfo &TTI,
> AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
> - bool PreserveLCSSA,
> + bool PreserveLCSSA, int OptLevel,
> Optional<unsigned> ProvidedCount,
> Optional<unsigned> ProvidedThreshold,
> Optional<bool> ProvidedAllowPartial,
> @@ -947,7 +948,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
> bool NotDuplicatable;
> bool Convergent;
> TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
> - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
> + L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
> ProvidedRuntime, ProvidedUpperBound);
> // Exit early if unrolling is disabled.
> if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
> @@ -1047,16 +1048,17 @@ namespace {
> class LoopUnroll : public LoopPass {
> public:
> static char ID; // Pass ID, replacement for typeid
> - LoopUnroll(Optional<unsigned> Threshold = None,
> + LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
> Optional<unsigned> Count = None,
> Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
> Optional<bool> UpperBound = None)
> - : LoopPass(ID), ProvidedCount(std::move(Count)),
> + : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
> ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
> ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
> initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
> }
>
> + int OptLevel;
> Optional<unsigned> ProvidedCount;
> Optional<unsigned> ProvidedThreshold;
> Optional<bool> ProvidedAllowPartial;
> @@ -1081,7 +1083,7 @@ public:
> OptimizationRemarkEmitter ORE(&F);
> bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
>
> - return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
> + return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel,
> ProvidedCount, ProvidedThreshold,
> ProvidedAllowPartial, ProvidedRuntime,
> ProvidedUpperBound);
> @@ -1107,21 +1109,22 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
> INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
> INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
>
> -Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
> - int Runtime, int UpperBound) {
> +Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
> + int AllowPartial, int Runtime,
> + int UpperBound) {
> // TODO: It would make more sense for this function to take the optionals
> // directly, but that's dangerous since it would silently break out of tree
> // callers.
> - return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
> - Count == -1 ? None : Optional<unsigned>(Count),
> - AllowPartial == -1 ? None
> - : Optional<bool>(AllowPartial),
> - Runtime == -1 ? None : Optional<bool>(Runtime),
> - UpperBound == -1 ? None : Optional<bool>(UpperBound));
> + return new LoopUnroll(
> + OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
> + Count == -1 ? None : Optional<unsigned>(Count),
> + AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
> + Runtime == -1 ? None : Optional<bool>(Runtime),
> + UpperBound == -1 ? None : Optional<bool>(UpperBound));
> }
>
> -Pass *llvm::createSimpleLoopUnrollPass() {
> - return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
> +Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
> + return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
> }
>
> PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
> @@ -1153,10 +1156,10 @@ PreservedAnalyses LoopUnrollPass::run(Lo
> Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam;
> if (!AllowPartialUnrolling)
> AllowPartialParam = RuntimeParam = UpperBoundParam = false;
> - bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
> - /*PreserveLCSSA*/ true, /*Count*/ None,
> - /*Threshold*/ None, AllowPartialParam,
> - RuntimeParam, UpperBoundParam);
> + bool Changed = tryToUnrollLoop(
> + &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
> + /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
> + /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
> if (!Changed)
> return PreservedAnalyses::all();
>
>
> Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll?rev=295538&r1=295537&r2=295538&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll Fri Feb 17 21:46:51 2017
> @@ -1,13 +1,14 @@
> ; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
> ; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
> -; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
> +; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
> +; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
> ; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
> ; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
> ; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC
> ; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC
> ; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
> ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
> -; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
> +; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
>
> ; This file tests the llvm.loop.vectorize.enable metadata forcing
> ; vectorization even when optimization levels are too low, or when
> @@ -25,6 +26,9 @@ target triple = "x86_64-unknown-linux-gn
> ; O3-LABEL: @enabled(
> ; O3: store <4 x i32>
> ; O3: ret i32
> +; O3DEFAULT-LABEL: @enabled(
> +; O3DEFAULT: store <4 x i32>
> +; O3DEFAULT: ret i32
> ; Pragma always wins!
> ; O3DIS-LABEL: @enabled(
> ; O3DIS: store <4 x i32>
> @@ -77,6 +81,9 @@ for.end:
> ; O3-LABEL: @nopragma(
> ; O3: store <4 x i32>
> ; O3: ret i32
> +; O3DEFAULT-LABEL: @nopragma(
> +; O3DEFAULT: store <4 x i32>
> +; O3DEFAULT: ret i32
> ; O3DIS-LABEL: @nopragma(
> ; O3DIS-NOT: store <4 x i32>
> ; O3DIS: ret i32
> @@ -128,6 +135,9 @@ for.end:
> ; O3-LABEL: @disabled(
> ; O3-NOT: store <4 x i32>
> ; O3: ret i32
> +; O3DEFAULT-LABEL: @disabled(
> +; O3DEFAULT: store <4 x i32>
> +; O3DEFAULT: ret i32
> ; O3DIS-LABEL: @disabled(
> ; O3DIS-NOT: store <4 x i32>
> ; O3DIS: ret i32
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list