[PATCH] R600: Make sure to schedule AR register uses and defs in the same clause

Vincent Lejeune vljn at ovi.com
Sun Jun 2 15:32:29 PDT 2013


Only one whitespace issue, otherwise patch is reviewed-by:vljn at ovi.com


----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : llvm-commits at cs.uiuc.edu
> Cc : Tom Stellard <thomas.stellard at amd.com>
> Envoyé le : Vendredi 31 mai 2013 20h28
> Objet : [PATCH] R600: Make sure to schedule AR register uses and defs in the
	same clause
> 
> From: Tom Stellard <thomas.stellard at amd.com>
> 
> ---
> lib/Target/R600/R600InstrInfo.cpp        |  6 +++--
> lib/Target/R600/R600MachineScheduler.cpp | 37 ++++++++++++++++++++++++--
> lib/Target/R600/R600MachineScheduler.h   |  2 ++
> test/CodeGen/R600/stack-simple.ll        | 45 ++++++++++++++++++++++++++++++++
> 4 files changed, 86 insertions(+), 4 deletions(-)
> create mode 100644 test/CodeGen/R600/stack-simple.ll
> 
> diff --git a/lib/Target/R600/R600InstrInfo.cpp 
> b/lib/Target/R600/R600InstrInfo.cpp
> index 42e62a4..14808b5 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -791,7 +791,8 @@ MachineInstrBuilder 
> R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
> 
>    MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
>                                        AddrReg, ValueReg)
> -                                      .addReg(AMDGPU::AR_X, 
> RegState::Implicit);
> +                                      .addReg(AMDGPU::AR_X,
> +                                           RegState::Implicit | 
> RegState::Kill);
>    setImmOperand(Mov, AMDGPU::OpName::DST_REL, 1);
>    return Mov;
> }
> @@ -808,7 +809,8 @@ MachineInstrBuilder 
> R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
>    MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
>                                        ValueReg,
>                                        AddrReg)
> -                                      .addReg(AMDGPU::AR_X, 
> RegState::Implicit);
> +                                      .addReg(AMDGPU::AR_X,
> +                                           RegState::Implicit | 
> RegState::Kill);
>    setImmOperand(Mov, AMDGPU::OpName::SRC0_REL, 1);
> 
>    return Mov;
> diff --git a/lib/Target/R600/R600MachineScheduler.cpp 
> b/lib/Target/R600/R600MachineScheduler.cpp
> index 8d61b8c..de40277 100644
> --- a/lib/Target/R600/R600MachineScheduler.cpp
> +++ b/lib/Target/R600/R600MachineScheduler.cpp
> @@ -59,14 +59,23 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
>    bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) 
> &&
>        (!Available[IDFetch].empty() || !Available[IDOther].empty());
> 
> -  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
> -      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
> +  // We want to scheduled AR defs as soon as possible to make sure they 
> aren't
> +  // put in a different ALU clause from their uses.
> +  if (!SU && !UnscheduledARDefs.empty()) {
> +      SU = UnscheduledARDefs[0];
> +      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
> +      NextInstKind = IDAlu;
> +  }
> +
> +  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
> +      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
>      // try to pick ALU
>      SU = pickAlu();
>      if (SU) {
>        if (CurEmitted >= InstKindLimit[IDAlu])
>          CurEmitted = 0;
>        NextInstKind = IDAlu;
> +

extra whitespace

>      }
>    }
> 
> @@ -84,6 +93,15 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
>        NextInstKind = IDOther;
>    }
> 
> +  // We want to schedule the AR uses as late as possible to make sure that
> +  // the AR defs have been released.
> +  if (!SU && !UnscheduledARUses.empty()) {
> +      SU = UnscheduledARUses[0];
> +      UnscheduledARUses.erase(UnscheduledARUses.begin());
> +      NextInstKind = IDAlu;
> +  }
> +
> +
>    DEBUG(
>        if (SU) {
>          dbgs() << " ** Pick node **\n";
> @@ -149,6 +167,21 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
>    DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
> 
>    int IK = getInstKind(SU);
> +
> +  // Check for AR register defines
> +  for (MachineInstr::const_mop_iterator I = 
> SU->getInstr()->operands_begin(),
> +                                        E = 
> SU->getInstr()->operands_end();
> +                                        I != E; ++I) {
> +    if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
> +      if (I->isDef()) {
> +        UnscheduledARDefs.push_back(SU);
> +      } else {
> +        UnscheduledARUses.push_back(SU);
> +      }
> +      return;
> +    }
> +  }
> +
>    // There is no export clause, we can schedule one as soon as its ready
>    if (IK == IDOther)
>      Available[IDOther].push_back(SU);
> diff --git a/lib/Target/R600/R600MachineScheduler.h 
> b/lib/Target/R600/R600MachineScheduler.h
> index 814ae9e..4dedf70 100644
> --- a/lib/Target/R600/R600MachineScheduler.h
> +++ b/lib/Target/R600/R600MachineScheduler.h
> @@ -52,6 +52,8 @@ class R600SchedStrategy : public MachineSchedStrategy {
> 
>    std::vector<SUnit *> Available[IDLast], Pending[IDLast];
>    std::vector<SUnit *> AvailableAlus[AluLast];
> +  std::vector<SUnit *> UnscheduledARDefs;
> +  std::vector<SUnit *> UnscheduledARUses;
> 
>    InstKind CurInstKind;
>    int CurEmitted;
> diff --git a/test/CodeGen/R600/stack-simple.ll 
> b/test/CodeGen/R600/stack-simple.ll
> new file mode 100644
> index 0000000..9bec183
> --- /dev/null
> +++ b/test/CodeGen/R600/stack-simple.ll
> @@ -0,0 +1,45 @@
> +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +
> +; CHECK: MOVA_INT
> +; CHECK-NOT: ALU clause
> +; CHECK: 0 + AR.x
> +; CHECK: MOVA_INT
> +; CHECK-NOT: ALU clause
> +; CHECK: 0 + AR.x
> +
> +; Function Attrs: nounwind
> +define void @stack_array_write(i32 addrspace(1)* nocapture %out, i32 
> addrspace(1)* nocapture %in) #0 {
> +entry:
> +  %stack = alloca [5 x i32], align 4
> +  %0 = load i32 addrspace(1)* %in, align 4, !tbaa !7
> +  %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
> +  store i32 4, i32* %arrayidx1, align 4, !tbaa !7
> +  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
> +  %1 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !7
> +  %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
> +  store i32 5, i32* %arrayidx3, align 4, !tbaa !7
> +  %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
> +  %2 = load i32* %arrayidx10, align 4, !tbaa !7
> +  store i32 %2, i32 addrspace(1)* %out, align 4, !tbaa !7
> +  %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
> +  %3 = load i32* %arrayidx12, align 4, !tbaa !7
> +  %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
> +  store i32 %3, i32 addrspace(1)* %arrayidx13, align 4, !tbaa !7
> +  ret void
> +}
> +
> +attributes #0 = { nounwind "less-precise-fpmad"="false" 
> "no-frame-pointer-elim"="false" 
> "no-frame-pointer-elim-non-leaf"="false" 
> "no-infs-fp-math"="false" 
> "no-nans-fp-math"="false" 
> "unsafe-fp-math"="false" 
> "use-soft-float"="false" }
> +
> +!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6}
> +
> +!0 = metadata !{null}
> +!1 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*)* 
> @stack_array_write}
> +!2 = metadata !{null}
> +!3 = metadata !{null}
> +!4 = metadata !{null}
> +!5 = metadata !{null}
> +!6 = metadata !{null}
> +!7 = metadata !{metadata !"int", metadata !8}
> +!8 = metadata !{metadata !"omnipotent char", metadata !9}
> +!9 = metadata !{metadata !"Simple C/C++ TBAA"}
> +
> -- 
> 1.7.11.4
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>   




More information about the llvm-commits mailing list