[PATCH] R600: Anti dep better handled in tex clause

Tom Stellard tom at stellard.net
Thu Jun 6 08:11:59 PDT 2013


On Thu, Jun 06, 2013 at 03:51:02PM +0200, Vincent Lejeune wrote:
> ---

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

>  lib/Target/R600/R600ControlFlowFinalizer.cpp | 10 ++++------
>  test/CodeGen/R600/tex-clause-antidep.ll      | 24 ++++++++++++++++++++++++
>  2 files changed, 28 insertions(+), 6 deletions(-)
>  create mode 100644 test/CodeGen/R600/tex-clause-antidep.ll
> 
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index f229613..385b5c5 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -110,7 +110,7 @@ private:
>    }
>  
>    bool isCompatibleWithClause(const MachineInstr *MI,
> -  std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const {
> +      std::set<unsigned> &DstRegs) const {
>      unsigned DstMI, SrcMI;
>      for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
>          E = MI->operands_end(); I != E; ++I) {
> @@ -136,9 +136,7 @@ private:
>                &AMDGPU::R600_Reg128RegClass);
>        }
>      }
> -    if ((DstRegs.find(SrcMI) == DstRegs.end()) &&
> -        (SrcRegs.find(DstMI) == SrcRegs.end())) {
> -      SrcRegs.insert(SrcMI);
> +    if ((DstRegs.find(SrcMI) == DstRegs.end())) {
>        DstRegs.insert(DstMI);
>        return true;
>      } else
> @@ -152,7 +150,7 @@ private:
>      std::vector<MachineInstr *> ClauseContent;
>      unsigned AluInstCount = 0;
>      bool IsTex = TII->usesTextureCache(ClauseHead);
> -    std::set<unsigned> DstRegs, SrcRegs;
> +    std::set<unsigned> DstRegs;
>      for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
>        if (IsTrivialInst(I))
>          continue;
> @@ -161,7 +159,7 @@ private:
>        if ((IsTex && !TII->usesTextureCache(I)) ||
>            (!IsTex && !TII->usesVertexCache(I)))
>          break;
> -      if (!isCompatibleWithClause(I, DstRegs, SrcRegs))
> +      if (!isCompatibleWithClause(I, DstRegs))
>          break;
>        AluInstCount ++;
>        ClauseContent.push_back(I);
> diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll
> new file mode 100644
> index 0000000..5979609
> --- /dev/null
> +++ b/test/CodeGen/R600/tex-clause-antidep.ll
> @@ -0,0 +1,24 @@
> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +
> +;CHECK: TEX
> +;CHECK-NEXT: ALU
> +
> +define void @test() {
> +  %1 = call float @llvm.R600.load.input(i32 0)
> +  %2 = call float @llvm.R600.load.input(i32 1)
> +  %3 = call float @llvm.R600.load.input(i32 2)
> +  %4 = call float @llvm.R600.load.input(i32 3)
> +  %5 = insertelement <4 x float> undef, float %1, i32 0
> +  %6 = insertelement <4 x float> %5, float %2, i32 1
> +  %7 = insertelement <4 x float> %6, float %3, i32 2
> +  %8 = insertelement <4 x float> %7, float %4, i32 3
> +  %9 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
> +  %10 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
> +  %11 = fadd <4 x float> %9, %10
> +  call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0)
> +  ret void
> +}
> +
> +declare float @llvm.R600.load.input(i32) readnone
> +declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> -- 
> 1.8.2.1
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list