[llvm] r193891 - LoopVectorizer: Perform redundancy elimination on induction variables
Nadav Rotem
nrotem at apple.com
Fri Nov 1 16:33:15 PDT 2013
Thanks :)
On Nov 1, 2013, at 4:32 PM, Arnold Schwaighofer <aschwaighofer at apple.com> wrote:
> r193895
>
> On Nov 1, 2013, at 6:12 PM, Nadav Rotem <nrotem at apple.com> wrote:
>
>> Arnold, can you please extract this code into its own function ?
>>
>>
>> On Nov 1, 2013, at 3:18 PM, Arnold Schwaighofer <aschwaighofer at apple.com> wrote:
>>
>>> Author: arnolds
>>> Date: Fri Nov 1 17:18:19 2013
>>> New Revision: 193891
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=193891&view=rev
>>> Log:
>>> LoopVectorizer: Perform redundancy elimination on induction variables
>>>
>>> When the loop vectorizer was part of the SCC inliner pass manager gvn would
>>> run after the loop vectorizer followed by instcombine. This way redundancy
>>> (multiple uses) were removed and instcombine could perform scalarization on the
>>> induction variables. Having moved the loop vectorizer to later we no longer run
>>> any form of redundancy elimination before we perform instcombine. This caused
>>> vectorized induction variables to survive that did not before.
>>>
>>> On a recent iMac this helps linpack back from 6000Mflops to 7000Mflops.
>>>
>>> This should also help lpbench and paq8p.
>>>
>>> I ran a Release (without Asserts) build over the test-suite and did not see any
>>> negative impact on compile time.
>>>
>>> radar://15339680
>>>
>>> Modified:
>>> llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>>> llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll
>>> llvm/trunk/test/Transforms/LoopVectorize/induction.ll
>>> llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
>>>
>>> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=193891&r1=193890&r2=193891&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Fri Nov 1 17:18:19 2013
>>> @@ -2272,8 +2272,41 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
>>> (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
>>> (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
>>> }// end of for each redux variable.
>>> -
>>> +
>>> fixLCSSAPHIs();
>>> +
>>> + // Perform simple cse.
>>> + SmallPtrSet<Instruction*, 16> Visited;
>>> + SmallVector<Instruction*, 16> ToRemove;
>>> + for (BasicBlock::iterator I = LoopVectorBody->begin(),
>>> + E = LoopVectorBody->end(); I != E; ++I) {
>>> + Instruction *In = I;
>>> +
>>> + if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
>>> + !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
>>> + continue;
>>> +
>>> + // Check if we can replace this instruction with any of the
>>> + // visited instructions.
>>> + for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
>>> + ve = Visited.end(); v != ve; ++v) {
>>> + if (In->isIdenticalTo(*v)) {
>>> + In->replaceAllUsesWith(*v);
>>> + ToRemove.push_back(In);
>>> + In = 0;
>>> + break;
>>> + }
>>> + }
>>> + if (In)
>>> + Visited.insert(In);
>>> +
>>> + }
>>> + // Erase all of the instructions that we RAUWed.
>>> + for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
>>> + ve = ToRemove.end(); v != ve; ++v) {
>>> + assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
>>> + (*v)->eraseFromParent();
>>> + }
>>> }
>>>
>>> void InnerLoopVectorizer::fixLCSSAPHIs() {
>>>
>>> Modified: llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll?rev=193891&r1=193890&r2=193891&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll (original)
>>> +++ llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll Fri Nov 1 17:18:19 2013
>>> @@ -336,9 +336,8 @@ for.end:
>>> ; return Foo.A[a];
>>> ; }
>>> ; CHECK-LABEL: define i32 @noAlias07(
>>> -; CHECK: sub nsw <4 x i32>
>>> +; CHECK: store <4 x i32>
>>> ; CHECK: ret
>>> -
>>> define i32 @noAlias07(i32 %a) #0 {
>>> entry:
>>> %a.addr = alloca i32, align 4
>>> @@ -552,7 +551,7 @@ for.end:
>>> ; return Bar.A[N][a];
>>> ; }
>>> ; CHECK-LABEL: define i32 @noAlias11(
>>> -; CHECK: sub nsw <4 x i32>
>>> +; CHECK: store <4 x i32>
>>> ; CHECK: ret
>>>
>>> define i32 @noAlias11(i32 %a) #0 {
>>> @@ -612,7 +611,7 @@ for.end:
>>> ; return Bar.A[N][a];
>>> ; }
>>> ; CHECK-LABEL: define i32 @noAlias12(
>>> -; CHECK: sub nsw <4 x i32>
>>> +; CHECK: store <4 x i32>
>>> ; CHECK: ret
>>>
>>> define i32 @noAlias12(i32 %a) #0 {
>>>
>>> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=193891&r1=193890&r2=193891&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
>>> +++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Fri Nov 1 17:18:19 2013
>>> @@ -28,3 +28,41 @@ for.end:
>>> ret void
>>> }
>>>
>>> +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
>>> +
>>> +; Make sure we remove unneeded vectorization of induction variables.
>>> +; In order for instcombine to cleanup the vectorized induction variables that we
>>> +; create in the loop vectorizer we need to perform some form of redundancy
>>> +; elimination to get rid of multiple uses.
>>> +
>>> +; IND-LABEL: scalar_use
>>> +
>>> +; IND: br label %vector.body
>>> +; IND: vector.body:
>>> +; Vectorized induction variable.
>>> +; IND-NOT: insertelement <2 x i64>
>>> +; IND-NOT: shufflevector <2 x i64>
>>> +; IND: br {{.*}}, label %vector.body
>>> +
>>> +define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
>>> +entry:
>>> + br label %for.body
>>> +
>>> +for.body:
>>> + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
>>> + %ind.sum = add i64 %iv, %offset
>>> + %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
>>> + %l1 = load float* %arr.idx, align 4
>>> + %ind.sum2 = add i64 %iv, %offset2
>>> + %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
>>> + %l2 = load float* %arr.idx2, align 4
>>> + %m = fmul fast float %b, %l2
>>> + %ad = fadd fast float %l1, %m
>>> + store float %ad, float* %arr.idx, align 4
>>> + %iv.next = add nuw nsw i64 %iv, 1
>>> + %exitcond = icmp eq i64 %iv.next, %n
>>> + br i1 %exitcond, label %loopexit, label %for.body
>>> +
>>> +loopexit:
>>> + ret void
>>> +}
>>>
>>> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll?rev=193891&r1=193890&r2=193891&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll (original)
>>> +++ llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll Fri Nov 1 17:18:19 2013
>>> @@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8
>>> @array = common global [1024 x i32] zeroinitializer, align 16
>>>
>>> ;CHECK-LABEL: @array_at_plus_one(
>>> -;CHECK: trunc i64
>>> ;CHECK: add i64 %index, 12
>>> +;CHECK: trunc i64
>>> ;CHECK: ret i32
>>> define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
>>> %1 = icmp sgt i32 %n, 0
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
More information about the llvm-commits
mailing list