[llvm] r200621 - LoopVectorizer: Enable unrolling of conditional stores and the load/store
Arnold Schwaighofer
aschwaighofer at apple.com
Sat Feb 1 19:12:35 PST 2014
Author: arnolds
Date: Sat Feb 1 21:12:34 2014
New Revision: 200621
URL: http://llvm.org/viewvc/llvm-project?rev=200621&view=rev
Log:
LoopVectorizer: Enable unrolling of conditional stores and the load/store
unrolling heuristic per default
Benchmarking on x86_64 (thanks Chandler!) and ARM has shown those options speed
up some benchmarks while not causing any interesting regressions.
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=200621&r1=200620&r2=200621&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Sat Feb 1 21:12:34 2014
@@ -180,16 +180,16 @@ static cl::opt<bool> LoopVectorizeWithBl
// Runtime unroll loops for load/store throughput.
static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
- "enable-loadstore-runtime-unroll", cl::init(false), cl::Hidden,
+ "enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden,
cl::desc("Enable runtime unrolling until load/store ports are saturated"));
/// The number of stores in a loop that are allowed to need predication.
static cl::opt<unsigned> NumberOfStoresToPredicate(
- "vectorize-num-stores-pred", cl::init(0), cl::Hidden,
+ "vectorize-num-stores-pred", cl::init(1), cl::Hidden,
cl::desc("Max number of stores to be predicated behind an if."));
static cl::opt<bool> EnableIndVarRegisterHeur(
- "enable-ind-var-reg-heur", cl::init(false), cl::Hidden,
+ "enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
cl::desc("Count the induction variable only once when unrolling"));
static cl::opt<bool> EnableCondStoresVectorization(
Modified: llvm/trunk/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ARM/arm-unroll.ll?rev=200621&r1=200620&r2=200621&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/ARM/arm-unroll.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/ARM/arm-unroll.ll Sat Feb 1 21:12:34 2014
@@ -47,6 +47,7 @@ define i32 @register_limit(i32* nocaptur
%sum.03 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ]
%sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ]
%sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
%2 = getelementptr inbounds i32* %A, i32 %i.02
%3 = load i32* %2, align 4
%4 = add nsw i32 %3, %sum.01
@@ -55,6 +56,7 @@ define i32 @register_limit(i32* nocaptur
%7 = add nsw i32 %3, %sum.03
%8 = add nsw i32 %3, %sum.04
%9 = add nsw i32 %3, %sum.05
+ %10 = add nsw i32 %3, %sum.05
%exitcond = icmp eq i32 %5, %n
br i1 %exitcond, label %._crit_edge, label %.lr.ph
@@ -64,5 +66,6 @@ define i32 @register_limit(i32* nocaptur
%sum.2.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ]
%sum.4.lcssa = phi i32 [ 0, %0 ], [ %8, %.lr.ph ]
%sum.5.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ %sum.6.lcssa = phi i32 [ 0, %0 ], [ %10, %.lr.ph ]
ret i32 %sum.0.lcssa
}
More information about the llvm-commits
mailing list