[PATCH] Make sink pass able to sink instructions in block-loop-block situation.

Tue Oct 29 09:48:56 PDT 2013

Ping.

On Mon, Oct 21, 2013 at 06:02:50AM -0700, Vincent Lejeune wrote:
> I noticed that I added an uneeded include, this newer patch removes it.
> 
> I tested the changes with lit test for all targets and ran llvm-test-suite on x86 with
> no regression on Fedora 19 x86.
> 
> Can I have a review please ?
> 
> 
> Vincent
> 
> 
> 
> 
> ----- Mail original -----
> > De : Vincent Lejeune <vljn at ovi.com>
> > À : llvm-commits at cs.uiuc.edu
> > Cc : Vincent Lejeune <vljn at ovi.com>
> > Envoyé le : Lundi 14 octobre 2013 20h04
> > Objet : [PATCH] Make sink pass able to sink instructions in block-loop-block situation.
> > 
> >T his patch makes Sink pass able to sink instruction from blocks with single
> > successor and to consider all dominated block as sink candidate.
> > 
> > It allows to reduce register liveness length in case like :
> > %BB1
> >   .. Lot of defs
> >   br label %LOOP
> > %LOOP
> >   ..
> >   br i1 %indvar, label %LOOP, label %BB2
> > %BB2
> >   .. Uses of value from %BB1
> > ---
> > lib/Transforms/Scalar/Sink.cpp             | 38 +++++++++++++++++------------
> > test/Transforms/Sink/aggressive-sinking.ll | 39 ++++++++++++++++++++++++++++++
> > 2 files changed, 62 insertions(+), 15 deletions(-)
> > create mode 100644 test/Transforms/Sink/aggressive-sinking.ll
> > 
> > diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
> > index d4595bb..ce2af39 100644
> > --- a/lib/Transforms/Scalar/Sink.cpp
> > +++ b/lib/Transforms/Scalar/Sink.cpp
> > @@ -24,6 +24,7 @@
> > #include "llvm/Support/CFG.h"
> > #include "llvm/Support/Debug.h"
> > #include "llvm/Support/raw_ostream.h"
> > +#include "llvm/ADT/DepthFirstIterator.h"
> > using namespace llvm;
> > 
> > STATISTIC(NumSunk, "Number of instructions sunk");
> > @@ -54,6 +55,7 @@ namespace {
> >      }
> >    private:
> >      bool ProcessBlock(BasicBlock &BB);
> > +    BasicBlock *ParseSinkToCandidates(Instruction *Inst, BasicBlock *BB) const;
> >      bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> 
> > &Stores);
> >      bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
> >      bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
> > @@ -117,8 +119,7 @@ bool Sinking::runOnFunction(Function &F) {
> > }
> > 
> > bool Sinking::ProcessBlock(BasicBlock &BB) {
> > -  // Can't sink anything out of a block that has less than two successors.
> > -  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return 
> > false;
> > +  if (BB.empty()) return false;
> > 
> >    // Don't bother sinking code out of unreachable blocks. In addition to 
> > being
> >    // unprofitable, it can also lead to infinite looping, because in an
> > @@ -214,6 +215,25 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
> >    return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
> > }
> > 
> > +/// ParseSinkToCandidates - Evaluate all block dominated by BB as sink
> > +/// destination.
> > +BasicBlock *Sinking::ParseSinkToCandidates(Instruction *Inst,
> > +                                           BasicBlock *BB) const {
> > +  DomTreeNode *DTN = DT->getNode(BB);
> > +  for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); I != E; 
> > ++I) {
> > +    BasicBlock *Candidate = (*I)->getBlock();
> > +    if (IsAcceptableTarget(Inst, Candidate))
> > +      return Candidate;
> > +    // If some uses are not dominated by Candidate, we can prune the dominator
> > +    // tree beneath.
> > +    if (!AllUsesDominatedByBlock(Inst, Candidate))
> > +      continue;
> > +    if (BasicBlock *ChildCandidate = ParseSinkToCandidates(Inst, Candidate))
> > +      return ChildCandidate;
> > +  }
> > +  return NULL;
> > +}
> > +
> > /// SinkInstruction - Determine whether it is safe to sink the specified machine
> > /// instruction out of its current block into a successor.
> > bool Sinking::SinkInstruction(Instruction *Inst,
> > @@ -232,19 +252,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
> > 
> >    // SuccToSinkTo - This is the successor to sink this instruction to, once we
> >    // decide.
> > -  BasicBlock *SuccToSinkTo = 0;
> > -
> > -  // Instructions can only be sunk if all their uses are in blocks
> > -  // dominated by one of the successors.
> > -  // Look at all the postdominators and see if we can sink it in one.
> > -  DomTreeNode *DTN = DT->getNode(Inst->getParent());
> > -  for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
> > -      I != E && SuccToSinkTo == 0; ++I) {
> > -    BasicBlock *Candidate = (*I)->getBlock();
> > -    if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
> > -        IsAcceptableTarget(Inst, Candidate))
> > -      SuccToSinkTo = Candidate;
> > -  }
> > +  BasicBlock *SuccToSinkTo = ParseSinkToCandidates(Inst, Inst->getParent());
> > 
> >    // If no suitable postdominator was found, look at all the successors and
> >    // decide which one we should sink to, if any.
> > diff --git a/test/Transforms/Sink/aggressive-sinking.ll 
> > b/test/Transforms/Sink/aggressive-sinking.ll
> > new file mode 100644
> > index 0000000..08e49d6
> > --- /dev/null
> > +++ b/test/Transforms/Sink/aggressive-sinking.ll
> > @@ -0,0 +1,39 @@
> > +; RUN: opt < %s -basicaa -sink -S | FileCheck %s
> > +
> > +;CHECK-LABEL: @main
> > +;CHECK-LABEL: after_loop
> > +;CHECK: getelementptr
> > + 
> > +define void @main([32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x 
> > i8>] addrspace(2)* byval) {
> > +main_body:
> > +  %2 = getelementptr [32 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
> > +  %3 = load <16 x i8> addrspace(2)* %2
> > +  %4 = getelementptr [16 x <32 x i8>] addrspace(2)* %1, i64 0, i32 0
> > +  %5 = load <32 x i8> addrspace(2)* %4
> > +  br label %loop
> > + 
> > +loop:                                             ; preds = %loop, %main_body
> > +  %indvar = phi i32 [0, %main_body], [%indvarincr, %loop]
> > +  %temp = phi float [1.0, %main_body], [%tempincr, %loop]
> > +  %indvarincr = add i32 %indvar, 1
> > +  %tempincr = fadd float %temp, 1.0
> > +  %6 = icmp ne i32 %indvar, 100
> > +  br i1 %6, label %after_loop, label %loop
> > + 
> > +after_loop:                                            ; preds = %after_loop
> > +  %7 = phi float [%temp, %loop]
> > +  %8 = bitcast float %7 to i32
> > +  %9 = insertelement <2 x i32> undef, i32 %8, i32 0
> > +  %10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %9, <32 
> > x i8> %5, <16 x i8> %3, i32 0)
> > +  %11 = extractelement <4 x float> %10, i32 0
> > +  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %11, 
> > float %11, float %11, float %11)
> > +  ret void
> > + 
> > +}
> > +
> > +; Function Attrs: nounwind readnone
> > +declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x 
> > i8>, <16 x i8>, i32) #1
> > +
> > +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, 
> > float)
> > +
> > +attributes #1 = { nounwind readnone }
> > -- 
> > 1.8.3.1
> > 

> From be881a2378ce49c87ada565309f671d061c9b12f Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 9 Oct 2013 19:39:44 +0200
> Subject: [PATCH] Make sink pass able to sink instructions in block-loop-block
>  situation.
> 
> This patch makes Sink pass able to sink instruction from blocks with single
> successor and to consider all dominated block as sink candidate.
> 
> It allows to reduce register liveness length in case like :
> %BB1
>   .. Lot of defs
>   br label %LOOP
> %LOOP
>   ..
>   br i1 %indvar, label %LOOP, label %BB2
> %BB2
>   .. Uses of value from %BB1
> ---
>  lib/Transforms/Scalar/Sink.cpp             | 37 ++++++++++++++++------------
>  test/Transforms/Sink/aggressive-sinking.ll | 39 ++++++++++++++++++++++++++++++
>  2 files changed, 61 insertions(+), 15 deletions(-)
>  create mode 100644 test/Transforms/Sink/aggressive-sinking.ll
> 
> diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
> index d4595bb..005633d 100644
> --- a/lib/Transforms/Scalar/Sink.cpp
> +++ b/lib/Transforms/Scalar/Sink.cpp
> @@ -54,6 +54,7 @@ namespace {
>      }
>    private:
>      bool ProcessBlock(BasicBlock &BB);
> +    BasicBlock *ParseSinkToCandidates(Instruction *Inst, BasicBlock *BB) const;
>      bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
>      bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
>      bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
> @@ -117,8 +118,7 @@ bool Sinking::runOnFunction(Function &F) {
>  }
>  
>  bool Sinking::ProcessBlock(BasicBlock &BB) {
> -  // Can't sink anything out of a block that has less than two successors.
> -  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
> +  if (BB.empty()) return false;
>  
>    // Don't bother sinking code out of unreachable blocks. In addition to being
>    // unprofitable, it can also lead to infinite looping, because in an
> @@ -214,6 +214,25 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
>    return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
>  }
>  
> +/// ParseSinkToCandidates - Evaluate all block dominated by BB as sink
> +/// destination.
> +BasicBlock *Sinking::ParseSinkToCandidates(Instruction *Inst,
> +                                           BasicBlock *BB) const {
> +  DomTreeNode *DTN = DT->getNode(BB);
> +  for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); I != E; ++I) {
> +    BasicBlock *Candidate = (*I)->getBlock();
> +    if (IsAcceptableTarget(Inst, Candidate))
> +      return Candidate;
> +    // If some uses are not dominated by Candidate, we can prune the dominator
> +    // tree beneath.
> +    if (!AllUsesDominatedByBlock(Inst, Candidate))
> +      continue;
> +    if (BasicBlock *ChildCandidate = ParseSinkToCandidates(Inst, Candidate))
> +      return ChildCandidate;
> +  }
> +  return NULL;
> +}
> +
>  /// SinkInstruction - Determine whether it is safe to sink the specified machine
>  /// instruction out of its current block into a successor.
>  bool Sinking::SinkInstruction(Instruction *Inst,
> @@ -232,19 +251,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
>  
>    // SuccToSinkTo - This is the successor to sink this instruction to, once we
>    // decide.
> -  BasicBlock *SuccToSinkTo = 0;
> -
> -  // Instructions can only be sunk if all their uses are in blocks
> -  // dominated by one of the successors.
> -  // Look at all the postdominators and see if we can sink it in one.
> -  DomTreeNode *DTN = DT->getNode(Inst->getParent());
> -  for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
> -      I != E && SuccToSinkTo == 0; ++I) {
> -    BasicBlock *Candidate = (*I)->getBlock();
> -    if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
> -        IsAcceptableTarget(Inst, Candidate))
> -      SuccToSinkTo = Candidate;
> -  }
> +  BasicBlock *SuccToSinkTo = ParseSinkToCandidates(Inst, Inst->getParent());
>  
>    // If no suitable postdominator was found, look at all the successors and
>    // decide which one we should sink to, if any.
> diff --git a/test/Transforms/Sink/aggressive-sinking.ll b/test/Transforms/Sink/aggressive-sinking.ll
> new file mode 100644
> index 0000000..08e49d6
> --- /dev/null
> +++ b/test/Transforms/Sink/aggressive-sinking.ll
> @@ -0,0 +1,39 @@
> +; RUN: opt < %s -basicaa -sink -S | FileCheck %s
> +
> +;CHECK-LABEL: @main
> +;CHECK-LABEL: after_loop
> +;CHECK: getelementptr
> + 
> +define void @main([32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval) {
> +main_body:
> +  %2 = getelementptr [32 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
> +  %3 = load <16 x i8> addrspace(2)* %2
> +  %4 = getelementptr [16 x <32 x i8>] addrspace(2)* %1, i64 0, i32 0
> +  %5 = load <32 x i8> addrspace(2)* %4
> +  br label %loop
> + 
> +loop:                                             ; preds = %loop, %main_body
> +  %indvar = phi i32 [0, %main_body], [%indvarincr, %loop]
> +  %temp = phi float [1.0, %main_body], [%tempincr, %loop]
> +  %indvarincr = add i32 %indvar, 1
> +  %tempincr = fadd float %temp, 1.0
> +  %6 = icmp ne i32 %indvar, 100
> +  br i1 %6, label %after_loop, label %loop
> + 
> +after_loop:                                            ; preds = %after_loop
> +  %7 = phi float [%temp, %loop]
> +  %8 = bitcast float %7 to i32
> +  %9 = insertelement <2 x i32> undef, i32 %8, i32 0
> +  %10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %9, <32 x i8> %5, <16 x i8> %3, i32 0)
> +  %11 = extractelement <4 x float> %10, i32 0
> +  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %11, float %11, float %11, float %11)
> +  ret void
> + 
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> +
> +attributes #1 = { nounwind readnone }
> -- 
> 1.8.3.1
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits