[llvm] r193891 - LoopVectorizer: Perform redundancy elimination on induction variables

Fri Nov 1 16:32:49 PDT 2013

r193895

On Nov 1, 2013, at 6:12 PM, Nadav Rotem <nrotem at apple.com> wrote:

> Arnold, can you please extract this code into its own function ?
> 
> 
> On Nov 1, 2013, at 3:18 PM, Arnold Schwaighofer <aschwaighofer at apple.com> wrote:
> 
>> Author: arnolds
>> Date: Fri Nov  1 17:18:19 2013
>> New Revision: 193891
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=193891&view=rev
>> Log:
>> LoopVectorizer: Perform redundancy elimination on induction variables
>> 
>> When the loop vectorizer was part of the SCC inliner pass manager gvn would
>> run after the loop vectorizer followed by instcombine. This way redundancy
>> (multiple uses) were removed and instcombine could perform scalarization on the
>> induction variables. Having moved the loop vectorizer to later we no longer run
>> any form of redundancy elimination before we perform instcombine. This caused
>> vectorized induction variables to survive that did not before.
>> 
>> On a recent iMac this helps linpack back from 6000Mflops to 7000Mflops.
>> 
>> This should also help lpbench and paq8p.
>> 
>> I ran a Release (without Asserts) build over the test-suite and did not see any
>> negative impact on compile time.
>> 
>> radar://15339680
>> 
>> Modified:
>>   llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>>   llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll
>>   llvm/trunk/test/Transforms/LoopVectorize/induction.ll
>>   llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
>> 
>> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=193891&r1=193890&r2=193891&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Fri Nov  1 17:18:19 2013
>> @@ -2272,8 +2272,41 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
>>    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
>>    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
>>  }// end of for each redux variable.
>> - 
>> +
>>  fixLCSSAPHIs();
>> +
>> +  // Perform simple cse.
>> +  SmallPtrSet<Instruction*, 16> Visited;
>> +  SmallVector<Instruction*, 16> ToRemove;
>> +  for (BasicBlock::iterator I = LoopVectorBody->begin(),
>> +       E = LoopVectorBody->end(); I != E; ++I) {
>> +      Instruction *In = I;
>> +
>> +      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
>> +          !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
>> +        continue;
>> +
>> +      // Check if we can replace this instruction with any of the
>> +      // visited instructions.
>> +      for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
>> +           ve = Visited.end(); v != ve; ++v) {
>> +        if (In->isIdenticalTo(*v)) {
>> +          In->replaceAllUsesWith(*v);
>> +          ToRemove.push_back(In);
>> +          In = 0;
>> +          break;
>> +        }
>> +      }
>> +      if (In)
>> +        Visited.insert(In);
>> +
>> +  }
>> +  // Erase all of the instructions that we RAUWed.
>> +  for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
>> +       ve = ToRemove.end(); v != ve; ++v) {
>> +    assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
>> +    (*v)->eraseFromParent();
>> +  }
>> }
>> 
>> void InnerLoopVectorizer::fixLCSSAPHIs() {
>> 
>> Modified: llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll?rev=193891&r1=193890&r2=193891&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll (original)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll Fri Nov  1 17:18:19 2013
>> @@ -336,9 +336,8 @@ for.end:
>> ;   return Foo.A[a];
>> ; }
>> ; CHECK-LABEL: define i32 @noAlias07(
>> -; CHECK: sub nsw <4 x i32>
>> +; CHECK: store <4 x i32>
>> ; CHECK: ret
>> -
>> define i32 @noAlias07(i32 %a) #0 {
>> entry:
>>  %a.addr = alloca i32, align 4
>> @@ -552,7 +551,7 @@ for.end:
>> ;   return Bar.A[N][a];
>> ; }
>> ; CHECK-LABEL: define i32 @noAlias11(
>> -; CHECK: sub nsw <4 x i32>
>> +; CHECK: store <4 x i32>
>> ; CHECK: ret
>> 
>> define i32 @noAlias11(i32 %a) #0 {
>> @@ -612,7 +611,7 @@ for.end:
>> ;   return Bar.A[N][a];
>> ; }
>> ; CHECK-LABEL: define i32 @noAlias12(
>> -; CHECK: sub nsw <4 x i32>
>> +; CHECK: store <4 x i32>
>> ; CHECK: ret
>> 
>> define i32 @noAlias12(i32 %a) #0 {
>> 
>> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=193891&r1=193890&r2=193891&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Fri Nov  1 17:18:19 2013
>> @@ -28,3 +28,41 @@ for.end:
>>  ret void
>> }
>> 
>> +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
>> +
>> +; Make sure we remove unneeded vectorization of induction variables.
>> +; In order for instcombine to cleanup the vectorized induction variables that we
>> +; create in the loop vectorizer we need to perform some form of redundancy
>> +; elimination to get rid of multiple uses.
>> +
>> +; IND-LABEL: scalar_use
>> +
>> +; IND:     br label %vector.body
>> +; IND:     vector.body:
>> +;   Vectorized induction variable.
>> +; IND-NOT:  insertelement <2 x i64>
>> +; IND-NOT:  shufflevector <2 x i64>
>> +; IND:     br {{.*}}, label %vector.body
>> +
>> +define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
>> +  %ind.sum = add i64 %iv, %offset
>> +  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
>> +  %l1 = load float* %arr.idx, align 4
>> +  %ind.sum2 = add i64 %iv, %offset2
>> +  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
>> +  %l2 = load float* %arr.idx2, align 4
>> +  %m = fmul fast float %b, %l2
>> +  %ad = fadd fast float %l1, %m
>> +  store float %ad, float* %arr.idx, align 4
>> +  %iv.next = add nuw nsw i64 %iv, 1
>> +  %exitcond = icmp eq i64 %iv.next, %n
>> +  br i1 %exitcond, label %loopexit, label %for.body
>> +
>> +loopexit:
>> +  ret void
>> +}
>> 
>> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll?rev=193891&r1=193890&r2=193891&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll (original)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll Fri Nov  1 17:18:19 2013
>> @@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8
>> @array = common global [1024 x i32] zeroinitializer, align 16
>> 
>> ;CHECK-LABEL: @array_at_plus_one(
>> -;CHECK: trunc i64
>> ;CHECK: add i64 %index, 12
>> +;CHECK: trunc i64
>> ;CHECK: ret i32
>> define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
>>  %1 = icmp sgt i32 %n, 0
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>